Loop Id: 2672 | Module: libgromacs_mpi.so.9.0.0 | Source: simd_prune_kernel.cpp:144-204 [...] | Coverage: 0.42% |
---|
Loop Id: 2672 | Module: libgromacs_mpi.so.9.0.0 | Source: simd_prune_kernel.cpp:144-204 [...] | Coverage: 0.42% |
---|
0x66f1c0 MOVSXD (%RAX,%R15,8),%R12 [3] |
0x66f1c4 SAL $0x3,%R12 |
0x66f1c8 LEA (%R12,%R12,2),%R12 |
0x66f1cc VBROADCASTF64X4 (%R8,%R12,4),%ZMM7 [1] |
0x66f1d3 MOVSXD %R12D,%R12 |
0x66f1d6 VBROADCASTF64X4 0x20(%R8,%R12,4),%ZMM8 [1] |
0x66f1de VBROADCASTF64X4 0x40(%R8,%R12,4),%ZMM9 [1] |
0x66f1e6 VSUBPS %ZMM7,%ZMM1,%ZMM10 |
0x66f1ec VSUBPS %ZMM8,%ZMM2,%ZMM11 |
0x66f1f2 VSUBPS %ZMM9,%ZMM3,%ZMM12 |
0x66f1f8 VSUBPS %ZMM7,%ZMM4,%ZMM7 |
0x66f1fe VSUBPS %ZMM8,%ZMM5,%ZMM8 |
0x66f204 VSUBPS %ZMM9,%ZMM6,%ZMM9 |
0x66f20a VMULPS %ZMM10,%ZMM10,%ZMM10 |
0x66f210 VMULPS %ZMM11,%ZMM11,%ZMM11 |
0x66f216 VADDPS %ZMM11,%ZMM10,%ZMM10 |
0x66f21c VMULPS %ZMM12,%ZMM12,%ZMM11 |
0x66f222 VADDPS %ZMM10,%ZMM11,%ZMM10 |
0x66f228 VMULPS %ZMM7,%ZMM7,%ZMM7 |
0x66f22e VMULPS %ZMM8,%ZMM8,%ZMM8 |
0x66f234 VADDPS %ZMM8,%ZMM7,%ZMM7 |
0x66f23a VMULPS %ZMM9,%ZMM9,%ZMM8 |
0x66f240 VADDPS %ZMM7,%ZMM8,%ZMM7 |
0x66f246 VCMPPS $0x1,%ZMM0,%ZMM10,%K0 |
0x66f24d VCMPPS $0x1,%ZMM0,%ZMM7,%K1 |
0x66f254 MOVSXD %R13D,%R13 |
0x66f257 MOV (%RAX,%R15,8),%R12 [3] |
0x66f25b MOV %R12,(%RDX,%R13,8) [4] |
0x66f25f XOR %R12D,%R12D |
0x66f262 KORTESTW %K0,%K1 |
0x66f266 SETNE %R12B |
0x66f26a ADD %R12D,%R13D |
0x66f26d INC %R15 |
0x66f270 MOVSXD (%R10),%R12 [2] |
0x66f273 CMP %R12,%R15 |
0x66f276 JL 66f1c0 |
/home/eoseret/gromacs-2024.2/src/gromacs/nbnxm/simd_load_store_functions.h: 109 - 109 |
-------------------------------------------------------------------------------- |
109: return loadDuplicateHsimd(ptr + offset); |
/home/eoseret/gromacs-2024.2/src/gromacs/nbnxm/simd_prune_kernel.cpp: 144 - 204 |
-------------------------------------------------------------------------------- |
144: for (int cjind = ciEntry->cj_ind_start; cjind < ciEntry->cj_ind_end; cjind++) |
145: { |
146: /* j-cluster index */ |
147: int cj = cjOuter[cjind].cj; |
[...] |
154: ajx = aj * DIM; |
[...] |
203: cjInner[ncjInner] = cjOuter[cjind]; |
204: if (anyTrue(wco[0])) |
/home/eoseret/gromacs-2024.2/src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_float.h: 370 - 370 |
-------------------------------------------------------------------------------- |
370: return { _mm512_castpd_ps(_mm512_broadcast_f64x4(_mm256_load_pd(reinterpret_cast<const double*>(m)))) }; |
/home/eoseret/gromacs-2024.2/src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_float.h: 181 - 393 |
-------------------------------------------------------------------------------- |
181: return { _mm512_add_ps(a.simdInternal_, b.simdInternal_) }; |
182: } |
183: |
184: static inline SimdFloat gmx_simdcall operator-(SimdFloat a, SimdFloat b) |
185: { |
186: return { _mm512_sub_ps(a.simdInternal_, b.simdInternal_) }; |
[...] |
197: return { _mm512_mul_ps(a.simdInternal_, b.simdInternal_) }; |
[...] |
367: return { _mm512_cmp_ps_mask(a.simdInternal_, b.simdInternal_, _CMP_LT_OQ) }; |
[...] |
393: return (avx512Mask2Int(a.simdInternal_) != 0); |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►89.08+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►10.92+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►76.13+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►23.87+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►46.49+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►29.41+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►7.40+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►6.36+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
►5.17+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
►5.17+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►51.95+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►29.95+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►6.60+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►4.42+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►4.23+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►2.84+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►46.05+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►34.27+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►8.84+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►5.51+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►3.16+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
►2.18+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►57.12+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►40.64+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►1.26+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►45.61+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►38.15+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►4.22+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►3.78+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►3.01+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►2.42+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►1.45+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
►1.36+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►63.33+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►36.67+ | .omp_outlined.#0x66eb40 | prunekerneldispatch.cpp:82 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchPr[...] | prunekerneldispatch.cpp:97 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:440 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.05 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.05 |
Bottlenecks | P11, |
Function | void nbnxmSimdPruneKernel<(KernelLayout)1>(NbnxnPairlistCpu*, nbnxn_atomdata_t const&, gmx::ArrayRef |
Source | simd_load_store_functions.h:109-109,simd_prune_kernel.cpp:144-147,simd_prune_kernel.cpp:154-154,simd_prune_kernel.cpp:203-204,impl_x86_avx_512_util_float.h:370-370,impl_x86_avx_512_simd_float.h:181-186,impl_x86_avx_512_simd_float.h:197-197,impl_x86_avx_512_simd_float.h:367-367,impl_x86_avx_512_simd_float.h:393-393 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 11.50 |
CQA cycles if no scalar integer | 11.00 |
CQA cycles if FP arith vectorized | 11.50 |
CQA cycles if fully vectorized | 11.50 |
Front-end cycles | 6.00 |
P0 cycles | 2.50 |
P1 cycles | 2.50 |
P2 cycles | 2.25 |
P3 cycles | 2.25 |
P4 cycles | 0.50 |
P5 cycles | 2.33 |
P6 cycles | 2.33 |
P7 cycles | 2.33 |
P8 cycles | 9.00 |
P9 cycles | 6.50 |
P10 cycles | 11.00 |
P11 cycles | 11.50 |
P12 cycles | 0.50 |
P13 cycles | 0.50 |
DIV/SQRT cycles | 0.00 |
Inter-iter dependencies cycles | 2 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 36.00 |
Nb uops | 36.00 |
Nb loads | 6.00 |
Nb stores | 1.00 |
Nb stack references | 0.00 |
FLOP/cycle | 22.26 |
Nb FLOP add-sub | 160.00 |
Nb FLOP mul | 96.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 10.43 |
Bytes prefetched | 0.00 |
Bytes loaded | 112.00 |
Bytes stored | 8.00 |
Stride 0 | 1.00 |
Stride 1 | 1.00 |
Stride n | 0.00 |
Stride unknown | 2.00 |
Stride indirect | 0.00 |
Vectorization ratio all | 95.45 |
Vectorization ratio load | 100.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 100.00 |
Vector-efficiency ratio all | 89.20 |
Vector-efficiency ratio load | 50.00 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 100.00 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 70.00 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.05 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.05 |
Bottlenecks | P11, |
Function | void nbnxmSimdPruneKernel<(KernelLayout)1>(NbnxnPairlistCpu*, nbnxn_atomdata_t const&, gmx::ArrayRef |
Source | simd_load_store_functions.h:109-109,simd_prune_kernel.cpp:144-147,simd_prune_kernel.cpp:154-154,simd_prune_kernel.cpp:203-204,impl_x86_avx_512_util_float.h:370-370,impl_x86_avx_512_simd_float.h:181-186,impl_x86_avx_512_simd_float.h:197-197,impl_x86_avx_512_simd_float.h:367-367,impl_x86_avx_512_simd_float.h:393-393 |
Source loop unroll info | not unrolled or unrolled with no peel/tail loop |
Source loop unroll confidence level | max |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 11.50 |
CQA cycles if no scalar integer | 11.00 |
CQA cycles if FP arith vectorized | 11.50 |
CQA cycles if fully vectorized | 11.50 |
Front-end cycles | 6.00 |
P0 cycles | 2.50 |
P1 cycles | 2.50 |
P2 cycles | 2.25 |
P3 cycles | 2.25 |
P4 cycles | 0.50 |
P5 cycles | 2.33 |
P6 cycles | 2.33 |
P7 cycles | 2.33 |
P8 cycles | 9.00 |
P9 cycles | 6.50 |
P10 cycles | 11.00 |
P11 cycles | 11.50 |
P12 cycles | 0.50 |
P13 cycles | 0.50 |
DIV/SQRT cycles | 0.00 |
Inter-iter dependencies cycles | 2 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 36.00 |
Nb uops | 36.00 |
Nb loads | 6.00 |
Nb stores | 1.00 |
Nb stack references | 0.00 |
FLOP/cycle | 22.26 |
Nb FLOP add-sub | 160.00 |
Nb FLOP mul | 96.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 10.43 |
Bytes prefetched | 0.00 |
Bytes loaded | 112.00 |
Bytes stored | 8.00 |
Stride 0 | 1.00 |
Stride 1 | 1.00 |
Stride n | 0.00 |
Stride unknown | 2.00 |
Stride indirect | 0.00 |
Vectorization ratio all | 95.45 |
Vectorization ratio load | 100.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 100.00 |
Vector-efficiency ratio all | 89.20 |
Vector-efficiency ratio load | 50.00 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 100.00 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 70.00 |
Path / |
Function | void nbnxmSimdPruneKernel<(KernelLayout)1>(NbnxnPairlistCpu*, nbnxn_atomdata_t const&, gmx::ArrayRef |
Source file and lines | simd_prune_kernel.cpp:144-204 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 36 |
nb uops | 36 |
loop length | 188 |
used x86 registers | 7 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 13 |
nb stack references | 0 |
ADD-SUB / MUL ratio | 1.67 |
micro-operation queue | 6.00 cycles |
front end | 6.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 2.25 | 2.25 | 0.50 | 2.33 | 2.33 | 2.33 | 5.00 | 4.50 | 6.50 | 6.00 | 0.50 | 0.50 |
cycles | 2.50 | 2.50 | 2.25 | 2.25 | 0.50 | 2.33 | 2.33 | 2.33 | 9.00 | 6.50 | 11.00 | 11.50 | 0.50 | 0.50 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 2.00 |
Front-end | 6.00 |
Dispatch | 11.50 |
Data deps. | 2.00 |
Overall L1 | 11.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 95% |
load | 100% |
store | 0% |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 92% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 70% |
all | 89% |
load | 50% |
store | 12% |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 70% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOVSXD (%RAX,%R15,8),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
SAL $0x3,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%R12,%R12,2),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VBROADCASTF64X4 (%R8,%R12,4),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
MOVSXD %R12D,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VBROADCASTF64X4 0x20(%R8,%R12,4),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VBROADCASTF64X4 0x40(%R8,%R12,4),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VSUBPS %ZMM7,%ZMM1,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM8,%ZMM2,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM9,%ZMM3,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM7,%ZMM4,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM8,%ZMM5,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM9,%ZMM6,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM10,%ZMM10,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM11,%ZMM11,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM11,%ZMM10,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM12,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM10,%ZMM11,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM8,%ZMM8,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM9,%ZMM9,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM7,%ZMM8,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VCMPPS $0x1,%ZMM0,%ZMM10,%K0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VCMPPS $0x1,%ZMM0,%ZMM7,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
MOVSXD %R13D,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV (%RAX,%R15,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R12,(%RDX,%R13,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
KORTESTW %K0,%K1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 | N/A |
SETNE %R12B | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
ADD %R12D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
INC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOVSXD (%R10),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
CMP %R12,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JL 66f1c0 <_Z20nbnxmSimdPruneKernelIL12KernelLayout1EEvP16NbnxnPairlistCpuRK16nbnxn_atomdata_tN3gmx8ArrayRefIKNS6_11BasicVectorIfEEEEf+0x250> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
Function | void nbnxmSimdPruneKernel<(KernelLayout)1>(NbnxnPairlistCpu*, nbnxn_atomdata_t const&, gmx::ArrayRef |
Source file and lines | simd_prune_kernel.cpp:144-204 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 36 |
nb uops | 36 |
loop length | 188 |
used x86 registers | 7 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 13 |
nb stack references | 0 |
ADD-SUB / MUL ratio | 1.67 |
micro-operation queue | 6.00 cycles |
front end | 6.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.50 | 2.25 | 2.25 | 0.50 | 2.33 | 2.33 | 2.33 | 5.00 | 4.50 | 6.50 | 6.00 | 0.50 | 0.50 |
cycles | 2.50 | 2.50 | 2.25 | 2.25 | 0.50 | 2.33 | 2.33 | 2.33 | 9.00 | 6.50 | 11.00 | 11.50 | 0.50 | 0.50 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 2.00 |
Front-end | 6.00 |
Dispatch | 11.50 |
Data deps. | 2.00 |
Overall L1 | 11.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 95% |
load | 100% |
store | 0% |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 12% |
load | NA (no load vectorizable/vectorized instructions) |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 92% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 70% |
all | 89% |
load | 50% |
store | 12% |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 70% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOVSXD (%RAX,%R15,8),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
SAL $0x3,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%R12,%R12,2),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VBROADCASTF64X4 (%R8,%R12,4),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
MOVSXD %R12D,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VBROADCASTF64X4 0x20(%R8,%R12,4),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VBROADCASTF64X4 0x40(%R8,%R12,4),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VSUBPS %ZMM7,%ZMM1,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM8,%ZMM2,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM9,%ZMM3,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM7,%ZMM4,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM8,%ZMM5,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM9,%ZMM6,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM10,%ZMM10,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM11,%ZMM11,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM11,%ZMM10,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM12,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM10,%ZMM11,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM8,%ZMM8,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM9,%ZMM9,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM7,%ZMM8,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VCMPPS $0x1,%ZMM0,%ZMM10,%K0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VCMPPS $0x1,%ZMM0,%ZMM7,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
MOVSXD %R13D,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV (%RAX,%R15,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R12,(%RDX,%R13,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
KORTESTW %K0,%K1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 | N/A |
SETNE %R12B | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
ADD %R12D,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
INC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOVSXD (%R10),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
CMP %R12,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JL 66f1c0 <_Z20nbnxmSimdPruneKernelIL12KernelLayout1EEvP16NbnxnPairlistCpuRK16nbnxn_atomdata_tN3gmx8ArrayRefIKNS6_11BasicVectorIfEEEEf+0x250> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
Run 1x1 | Number processes: 1Number processes per node: 1OMP_NUM_THREADS: 1 |
---|---|
Run 2x1 | Number processes: 2Number processes per node: 2OMP_NUM_THREADS: 1 |
Run 4x1 | Number processes: 4Number processes per node: 4OMP_NUM_THREADS: 1 |
Run 8x1 | Number processes: 8Number processes per node: 8OMP_NUM_THREADS: 1 |
Run 16x1 | Number processes: 16Number processes per node: 16OMP_NUM_THREADS: 1 |
Run 32x1 | Number processes: 32Number processes per node: 32OMP_NUM_THREADS: 1 |
Run 64x1 | Number processes: 64Number processes per node: 64OMP_NUM_THREADS: 1 |
Run 128x1 | Number processes: 128Number processes per node: 128OMP_NUM_THREADS: 1 |
Run 192x1 | Number processes: 192Number nodes: 1Number processes per node: 192Run Command: <executable> mdrun -s ion_channel.tpr -nsteps 10000 -pin on -deffnm aoccMPI Command: mpirun -genv I_MPI_FABRICS=shm -n <number_processes>Dataset: Run Directory: .OMP_NUM_THREADS: 1 |
(1x1) Efficiency | (1x1) Potential Speed-Up (%) | (2x1) Efficiency | (2x1) Potential Speed-Up (%) | (4x1) Efficiency | (4x1) Potential Speed-Up (%) | (8x1) Efficiency | (8x1) Potential Speed-Up (%) | (16x1) Efficiency | (16x1) Potential Speed-Up (%) | (32x1) Efficiency | (32x1) Potential Speed-Up (%) | (64x1) Efficiency | (64x1) Potential Speed-Up (%) | (128x1) Efficiency | (128x1) Potential Speed-Up (%) | (192x1) Efficiency | (192x1) Potential Speed-Up (%) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 0.95 | 0.05 | 0.88 | 0.13 | 0.69 | 0.32 | 0.83 | 0.13 | 0.77 | 0.16 | 0.65 | 0.19 | 0.86 | 0.07 | 0.83 | 0.07 |
Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
---|---|---|---|---|---|---|
1x1 | 1 | 1 | 1 | 1 | 9.0149984359741 | 1.0814478397369 |
2x1 | 2 | 0.95 | 1.9 | 2 | 4.864999294281 | 1.0457406044006 |
4x1 | 4 | 0.88 | 3.5 | 4 | 2.6199998855591 | 1.0327287912369 |
8x1 | 8 | 0.69 | 5.56 | 8 | 1.7049999237061 | 1.0482286214828 |
16x1 | 16 | 0.83 | 13.36 | 16 | 0.83499997854233 | 0.81601011753082 |
32x1 | 20 | 0.77 | 24.6 | 32 | 0.67499995231628 | 0.71316182613373 |
64x1 | 40 | 0.65 | 41.47 | 64 | 0.42499995231628 | 0.54695230722427 |
128x1 | 84 | 0.86 | 110.28 | 128 | 0.17499998211861 | 0.48099100589752 |
192x1 | 128 | 0.83 | 158.65 | 192 | 0.1599999666214 | 0.42184561491013 |