Loop Id: 17097 | Module: libgromacs_mpi.so.9.0.0 | Source: pairs.cpp:873-911 [...] | Coverage: 0.17% |
---|
Loop Id: 17097 | Module: libgromacs_mpi.so.9.0.0 | Source: pairs.cpp:873-911 [...] | Coverage: 0.17% |
---|
0xbec210 VMOVDQA64 0xc0(%RSP),%ZMM0 |
0xbec218 VPMULLD %ZMM10,%ZMM0,%ZMM1 |
0xbec21e KXNORW %K0,%K0,%K1 |
0xbec222 VXORPS %XMM2,%XMM2,%XMM2 |
0xbec226 VGATHERDPS (%R13,%ZMM1,4),%ZMM2{%K1} |
0xbec22e KXNORW %K0,%K0,%K1 |
0xbec232 VXORPS %XMM19,%XMM19,%XMM19 |
0xbec238 MOV 0x88(%RSP),%RCX |
0xbec240 VGATHERDPS (%RCX,%ZMM1,4),%ZMM19{%K1} |
0xbec247 KXNORW %K0,%K0,%K1 |
0xbec24b VXORPS %XMM20,%XMM20,%XMM20 |
0xbec251 VGATHERDPS (%RDX,%ZMM1,4),%ZMM20{%K1} |
0xbec258 VMOVDQA64 0x100(%RSP),%ZMM18 |
0xbec260 VPMULLD %ZMM10,%ZMM18,%ZMM1 |
0xbec266 KXNORW %K0,%K0,%K1 |
0xbec26a VXORPS %XMM21,%XMM21,%XMM21 |
0xbec270 VGATHERDPS (%R13,%ZMM1,4),%ZMM21{%K1} |
0xbec278 KXNORW %K0,%K0,%K1 |
0xbec27c VXORPS %XMM22,%XMM22,%XMM22 |
0xbec282 VGATHERDPS (%RCX,%ZMM1,4),%ZMM22{%K1} |
0xbec289 KXNORW %K0,%K0,%K1 |
0xbec28d VXORPS %XMM23,%XMM23,%XMM23 |
0xbec293 VGATHERDPS (%RDX,%ZMM1,4),%ZMM23{%K1} |
0xbec29a VSUBPS %ZMM21,%ZMM2,%ZMM1 |
0xbec2a0 VSUBPS %ZMM22,%ZMM19,%ZMM2 |
0xbec2a6 VMULPS 0x2c0(%RSP),%ZMM12,%ZMM19 |
0xbec2ae VSUBPS %ZMM23,%ZMM20,%ZMM20 |
0xbec2b4 VMULPS 0x340(%RSP),%ZMM20,%ZMM21 |
0xbec2bc VRNDSCALEPS $0,%ZMM21,%ZMM21 |
0xbec2c3 VMULPS 0x180(%RSP),%ZMM21,%ZMM22 |
0xbec2cb VSUBPS %ZMM22,%ZMM1,%ZMM1 |
0xbec2d1 VMULPS 0x140(%RSP),%ZMM21,%ZMM22 |
0xbec2d9 VSUBPS %ZMM22,%ZMM2,%ZMM2 |
0xbec2df VMULPS 0x200(%RSP),%ZMM21,%ZMM21 |
0xbec2e7 VMULPS %ZMM5,%ZMM2,%ZMM22 |
0xbec2ed VRNDSCALEPS $0,%ZMM22,%ZMM22 |
0xbec2f4 VSUBPS %ZMM21,%ZMM20,%ZMM20 |
0xbec2fa VMULPS %ZMM6,%ZMM22,%ZMM21 |
0xbec300 VSUBPS %ZMM21,%ZMM1,%ZMM1 |
0xbec306 VMULPS %ZMM7,%ZMM22,%ZMM21 |
0xbec30c VSUBPS %ZMM21,%ZMM2,%ZMM2 |
0xbec312 VMULPS %ZMM8,%ZMM1,%ZMM21 |
0xbec318 VRNDSCALEPS $0,%ZMM21,%ZMM21 |
0xbec31f VMULPS %ZMM9,%ZMM21,%ZMM21 |
0xbec325 VSUBPS %ZMM21,%ZMM1,%ZMM1 |
0xbec32b VMULPS %ZMM1,%ZMM1,%ZMM21 |
0xbec331 VMULPS %ZMM2,%ZMM2,%ZMM22 |
0xbec337 VADDPS %ZMM22,%ZMM21,%ZMM21 |
0xbec33d VMULPS %ZMM20,%ZMM20,%ZMM22 |
0xbec343 VADDPS %ZMM22,%ZMM21,%ZMM21 |
0xbec349 VRSQRT14PS %ZMM21,%ZMM22 |
0xbec34f VMULPS %ZMM21,%ZMM22,%ZMM21 |
0xbec355 VMULPS %ZMM13,%ZMM22,%ZMM23 |
0xbec35b VFMADD213PS %ZMM14,%ZMM22,%ZMM21 |
0xbec361 VMULPS %ZMM21,%ZMM23,%ZMM21 |
0xbec367 VMULPS %ZMM21,%ZMM21,%ZMM22 |
0xbec36d VMULPS %ZMM22,%ZMM22,%ZMM23 |
0xbec373 VMULPS %ZMM23,%ZMM22,%ZMM23 |
0xbec379 VMOVAPS 0x240(%RSP),%ZMM3 |
0xbec381 VMULPS 0x300(%RSP),%ZMM3,%ZMM24 |
0xbec389 VMULPS %ZMM21,%ZMM24,%ZMM21 |
0xbec38f VMULPS 0x280(%RSP),%ZMM15,%ZMM24 |
0xbec397 VFMADD231PS %ZMM19,%ZMM23,%ZMM24 |
0xbec39d VFMADD213PS %ZMM21,%ZMM23,%ZMM24 |
0xbec3a3 VMULPS %ZMM24,%ZMM22,%ZMM19 |
0xbec3a9 VMULPS %ZMM19,%ZMM1,%ZMM1 |
0xbec3af VMULPS %ZMM19,%ZMM2,%ZMM2 |
0xbec3b5 VMULPS %ZMM19,%ZMM20,%ZMM19 |
0xbec3bb VPSLLD $0x2,%ZMM0,%ZMM0 |
0xbec3c2 VPERMD %ZMM0,%ZMM16,%ZMM3 |
0xbec3c8 VPERMD %ZMM0,%ZMM17,%ZMM0 |
0xbec3ce VUNPCKLPS %ZMM19,%ZMM1,%ZMM20 |
0xbec3d4 VUNPCKHPS %ZMM19,%ZMM1,%ZMM1 |
0xbec3da VUNPCKLPS %ZMM11,%ZMM2,%ZMM19 |
0xbec3e0 VUNPCKHPS %ZMM11,%ZMM2,%ZMM2 |
0xbec3e6 VUNPCKLPS %ZMM19,%ZMM20,%ZMM30 |
0xbec3ec VUNPCKHPS %ZMM19,%ZMM20,%ZMM21 |
0xbec3f2 VUNPCKLPS %ZMM2,%ZMM1,%ZMM20 |
0xbec3f8 VUNPCKHPS %ZMM2,%ZMM1,%ZMM19 |
0xbec3fe VEXTRACTF32X4 $0x1,%YMM30,%XMM31 |
0xbec405 VEXTRACTF32X4 $0x2,%ZMM30,%XMM28 |
0xbec40c VEXTRACTF32X4 $0x3,%ZMM30,%XMM26 |
0xbec413 VEXTRACTF32X4 $0x1,%YMM21,%XMM24 |
0xbec41a VEXTRACTF32X4 $0x2,%ZMM21,%XMM22 |
0xbec421 VPMOVSXDQ %YMM0,%ZMM0 |
0xbec427 VMOVQ %XMM0,%R8 |
0xbec42c VADDPS (%R15,%R8,4),%XMM30,%XMM1 |
0xbec433 VMOVAPS %XMM1,(%R15,%R8,4) |
0xbec439 VPEXTRQ $0x1,%XMM0,%R8 |
0xbec43f VADDPS (%R15,%R8,4),%XMM31,%XMM1 |
0xbec446 VMOVAPS %XMM1,(%R15,%R8,4) |
0xbec44c VEXTRACTI128 $0x1,%YMM0,%XMM1 |
0xbec452 VMOVQ %XMM1,%R8 |
0xbec457 VADDPS (%R15,%R8,4),%XMM28,%XMM2 |
0xbec45e VMOVAPS %XMM2,(%R15,%R8,4) |
0xbec464 VPEXTRQ $0x1,%XMM1,%R8 |
0xbec46a VADDPS (%R15,%R8,4),%XMM26,%XMM1 |
0xbec471 VMOVAPS %XMM1,(%R15,%R8,4) |
0xbec477 VEXTRACTI32X4 $0x2,%ZMM0,%XMM1 |
0xbec47e VMOVQ %XMM1,%R8 |
0xbec483 VADDPS (%R15,%R8,4),%XMM21,%XMM2 |
0xbec48a VMOVAPS %XMM2,(%R15,%R8,4) |
0xbec490 VPEXTRQ $0x1,%XMM1,%R8 |
0xbec496 VADDPS (%R15,%R8,4),%XMM24,%XMM1 |
0xbec49d VMOVAPS %XMM1,(%R15,%R8,4) |
0xbec4a3 VEXTRACTI32X4 $0x3,%ZMM0,%XMM0 |
0xbec4aa VMOVQ %XMM0,%R8 |
0xbec4af VADDPS (%R15,%R8,4),%XMM22,%XMM1 |
0xbec4b6 VMOVAPS %XMM1,(%R15,%R8,4) |
0xbec4bc VPEXTRQ $0x1,%XMM0,%R8 |
0xbec4c2 VEXTRACTF32X4 $0x3,%ZMM21,%XMM1 |
0xbec4c9 VADDPS (%R15,%R8,4),%XMM1,%XMM0 |
0xbec4cf VMOVAPS %XMM0,(%R15,%R8,4) |
0xbec4d5 VEXTRACTF32X4 $0x1,%YMM20,%XMM0 |
0xbec4dc VEXTRACTF32X4 $0x2,%ZMM20,%XMM29 |
0xbec4e3 VEXTRACTF32X4 $0x3,%ZMM20,%XMM27 |
0xbec4ea VEXTRACTF32X4 $0x1,%YMM19,%XMM25 |
0xbec4f1 VEXTRACTF32X4 $0x2,%ZMM19,%XMM23 |
0xbec4f8 VPMOVSXDQ %YMM3,%ZMM2 |
0xbec4fe VMOVQ %XMM2,%R8 |
0xbec503 VADDPS (%R15,%R8,4),%XMM20,%XMM3 |
0xbec50a VMOVAPS %XMM3,(%R15,%R8,4) |
0xbec510 VPEXTRQ $0x1,%XMM2,%R8 |
0xbec516 VADDPS (%R15,%R8,4),%XMM0,%XMM3 |
0xbec51c VMOVAPS %XMM3,(%R15,%R8,4) |
0xbec522 VEXTRACTI128 $0x1,%YMM2,%XMM3 |
0xbec528 VMOVQ %XMM3,%R8 |
0xbec52d VADDPS (%R15,%R8,4),%XMM29,%XMM4 |
0xbec534 VMOVAPS %XMM4,(%R15,%R8,4) |
0xbec53a VPEXTRQ $0x1,%XMM3,%R8 |
0xbec540 VADDPS (%R15,%R8,4),%XMM27,%XMM3 |
0xbec547 VMOVAPS %XMM3,(%R15,%R8,4) |
0xbec54d VEXTRACTI32X4 $0x2,%ZMM2,%XMM3 |
0xbec554 VMOVQ %XMM3,%R8 |
0xbec559 VADDPS (%R15,%R8,4),%XMM19,%XMM4 |
0xbec560 VMOVAPS %XMM4,(%R15,%R8,4) |
0xbec566 VPEXTRQ $0x1,%XMM3,%R8 |
0xbec56c VADDPS (%R15,%R8,4),%XMM25,%XMM3 |
0xbec573 VMOVAPS %XMM3,(%R15,%R8,4) |
0xbec579 VEXTRACTI32X4 $0x3,%ZMM2,%XMM2 |
0xbec580 VMOVQ %XMM2,%R8 |
0xbec585 VADDPS (%R15,%R8,4),%XMM23,%XMM3 |
0xbec58c VMOVAPS %XMM3,(%R15,%R8,4) |
0xbec592 VPEXTRQ $0x1,%XMM2,%R8 |
0xbec598 VEXTRACTF32X4 $0x3,%ZMM19,%XMM2 |
0xbec59f VADDPS (%R15,%R8,4),%XMM2,%XMM3 |
0xbec5a5 VMOVAPS %XMM3,(%R15,%R8,4) |
0xbec5ab VPSLLD $0x2,%ZMM18,%ZMM3 |
0xbec5b2 VPERMD %ZMM3,%ZMM17,%ZMM4 |
0xbec5b8 VPMOVSXDQ %YMM4,%ZMM4 |
0xbec5be VMOVQ %XMM4,%R8 |
0xbec5c3 VMOVAPS (%R15,%R8,4),%XMM18 |
0xbec5ca VSUBPS %XMM30,%XMM18,%XMM18 |
0xbec5d0 VMOVAPS %XMM18,(%R15,%R8,4) |
0xbec5d7 VPEXTRQ $0x1,%XMM4,%R8 |
0xbec5dd VMOVAPS (%R15,%R8,4),%XMM18 |
0xbec5e4 VSUBPS %XMM31,%XMM18,%XMM18 |
0xbec5ea VMOVAPS %XMM18,(%R15,%R8,4) |
0xbec5f1 VEXTRACTI32X4 $0x1,%YMM4,%XMM18 |
0xbec5f8 VMOVQ %XMM18,%R8 |
0xbec5fe VMOVAPS (%R15,%R8,4),%XMM30 |
0xbec605 VSUBPS %XMM28,%XMM30,%XMM28 |
0xbec60b VMOVAPS %XMM28,(%R15,%R8,4) |
0xbec612 VPEXTRQ $0x1,%XMM18,%R8 |
0xbec619 VMOVAPS (%R15,%R8,4),%XMM18 |
0xbec620 VSUBPS %XMM26,%XMM18,%XMM18 |
0xbec626 VMOVAPS %XMM18,(%R15,%R8,4) |
0xbec62d VEXTRACTI32X4 $0x2,%ZMM4,%XMM18 |
0xbec634 VMOVQ %XMM18,%R8 |
0xbec63a VMOVAPS (%R15,%R8,4),%XMM26 |
0xbec641 VSUBPS %XMM21,%XMM26,%XMM21 |
0xbec647 VMOVAPS %XMM21,(%R15,%R8,4) |
0xbec64e VPEXTRQ $0x1,%XMM18,%R8 |
0xbec655 VMOVAPS (%R15,%R8,4),%XMM18 |
0xbec65c VSUBPS %XMM24,%XMM18,%XMM18 |
0xbec662 VMOVAPS %XMM18,(%R15,%R8,4) |
0xbec669 VEXTRACTI32X4 $0x3,%ZMM4,%XMM4 |
0xbec670 VMOVQ %XMM4,%R8 |
0xbec675 VMOVAPS (%R15,%R8,4),%XMM18 |
0xbec67c VSUBPS %XMM22,%XMM18,%XMM18 |
0xbec682 VMOVAPS %XMM18,(%R15,%R8,4) |
0xbec689 VPEXTRQ $0x1,%XMM4,%R8 |
0xbec68f VMOVAPS (%R15,%R8,4),%XMM4 |
0xbec695 VSUBPS %XMM1,%XMM4,%XMM1 |
0xbec699 VPERMD %ZMM3,%ZMM16,%ZMM3 |
0xbec69f VMOVAPS %XMM1,(%R15,%R8,4) |
0xbec6a5 VPMOVSXDQ %YMM3,%ZMM1 |
0xbec6ab VMOVQ %XMM1,%R8 |
0xbec6b0 VMOVAPS (%R15,%R8,4),%XMM3 |
0xbec6b6 VSUBPS %XMM20,%XMM3,%XMM3 |
0xbec6bc VMOVAPS %XMM3,(%R15,%R8,4) |
0xbec6c2 VPEXTRQ $0x1,%XMM1,%R8 |
0xbec6c8 VMOVAPS (%R15,%R8,4),%XMM3 |
0xbec6ce VSUBPS %XMM0,%XMM3,%XMM0 |
0xbec6d2 VMOVAPS %XMM0,(%R15,%R8,4) |
0xbec6d8 VEXTRACTI128 $0x1,%YMM1,%XMM0 |
0xbec6de VMOVQ %XMM0,%R8 |
0xbec6e3 VMOVAPS (%R15,%R8,4),%XMM3 |
0xbec6e9 VSUBPS %XMM29,%XMM3,%XMM3 |
0xbec6ef VMOVAPS %XMM3,(%R15,%R8,4) |
0xbec6f5 VPEXTRQ $0x1,%XMM0,%R8 |
0xbec6fb VMOVAPS (%R15,%R8,4),%XMM0 |
0xbec701 VSUBPS %XMM27,%XMM0,%XMM0 |
0xbec707 VMOVAPS %XMM0,(%R15,%R8,4) |
0xbec70d VEXTRACTI32X4 $0x2,%ZMM1,%XMM0 |
0xbec714 VMOVQ %XMM0,%R8 |
0xbec719 VMOVAPS (%R15,%R8,4),%XMM3 |
0xbec71f VSUBPS %XMM19,%XMM3,%XMM3 |
0xbec725 VMOVAPS %XMM3,(%R15,%R8,4) |
0xbec72b VPEXTRQ $0x1,%XMM0,%R8 |
0xbec731 VMOVAPS (%R15,%R8,4),%XMM0 |
0xbec737 VSUBPS %XMM25,%XMM0,%XMM0 |
0xbec73d VMOVAPS %XMM0,(%R15,%R8,4) |
0xbec743 VEXTRACTI32X4 $0x3,%ZMM1,%XMM0 |
0xbec74a VMOVQ %XMM0,%R8 |
0xbec74f VMOVAPS (%R15,%R8,4),%XMM1 |
0xbec755 VSUBPS %XMM23,%XMM1,%XMM1 |
0xbec75b VMOVAPS %XMM1,(%R15,%R8,4) |
0xbec761 VPEXTRQ $0x1,%XMM0,%R8 |
0xbec767 VMOVAPS (%R15,%R8,4),%XMM0 |
0xbec76d VSUBPS %XMM2,%XMM0,%XMM0 |
0xbec771 VMOVAPS %XMM0,(%R15,%R8,4) |
0xbec777 ADD $0x30,%RDI |
0xbec77b CMP %RSI,%RDI |
0xbec77e JAE bedab0 |
0xbec784 MOV %RDI,%R8 |
0xbec787 XOR %R9D,%R9D |
0xbec78a MOV %EDI,%R10D |
0xbec78d JMP bec7f1 |
(17098) 0xbec790 LEA (%R14,%R14,2),%R14 |
(17098) 0xbec794 SAL $0x4,%R14 |
(17098) 0xbec798 MOV 0x50(%RSP),%RCX |
(17098) 0xbec79d VMOVSS (%RCX,%R14,1),%XMM0 |
(17098) 0xbec7a3 VMOVSS %XMM0,0x280(%RSP,%R9,4) |
(17098) 0xbec7ad VMOVSS 0x4(%RCX,%R14,1),%XMM0 |
(17098) 0xbec7b4 VMOVSS %XMM0,0x2c0(%RSP,%R9,4) |
(17098) 0xbec7be VMOVSS (%RAX,%R11,4),%XMM0 |
(17098) 0xbec7c4 VMULSS (%RAX,%RBX,4),%XMM0,%XMM0 |
(17098) 0xbec7c9 VMOVSS %XMM0,0x300(%RSP,%R9,4) |
(17098) 0xbec7d3 LEA 0x3(%R10),%R11D |
(17098) 0xbec7d7 CMP 0x48(%RSP),%R11D |
(17098) 0xbec7dc CMOVL %R11D,%R10D |
(17098) 0xbec7e0 INC %R9 |
(17098) 0xbec7e3 ADD $0x3,%R8 |
(17098) 0xbec7e7 CMP $0x10,%R9 |
(17098) 0xbec7eb JE bec210 |
(17098) 0xbec7f1 MOVSXD %R10D,%RBX |
(17098) 0xbec7f4 MOVSXD (%R12,%RBX,4),%R14 |
(17098) 0xbec7f8 MOVSXD 0x4(%R12,%RBX,4),%R11 |
(17098) 0xbec7fd MOV %R11D,0xc0(%RSP,%R9,4) |
(17098) 0xbec805 MOVSXD 0x8(%R12,%RBX,4),%RBX |
(17098) 0xbec80a MOV %EBX,0x100(%RSP,%R9,4) |
(17098) 0xbec812 CMP %RSI,%R8 |
(17098) 0xbec815 JB bec790 |
(17098) 0xbec81b MOVL $0,0x280(%RSP,%R9,4) |
(17098) 0xbec827 MOVL $0,0x2c0(%RSP,%R9,4) |
(17098) 0xbec833 MOVL $0,0x300(%RSP,%R9,4) |
(17098) 0xbec83f INC %R9 |
(17098) 0xbec842 ADD $0x3,%R8 |
(17098) 0xbec846 CMP $0x10,%R9 |
(17098) 0xbec84a JNE bec7f1 |
0xbec84c JMP bec210 |
/home/eoseret/gromacs-2024.2/src/gromacs/listed_forces/pairs.cpp: 873 - 911 |
-------------------------------------------------------------------------------- |
873: for (int i = 0; i < nbonds; i += pack_size * nfa1) |
[...] |
879: for (int s = 0; s < pack_size; s++) |
880: { |
881: int itype = iatoms[iu]; |
882: ai[s] = iatoms[iu + 1]; |
883: aj[s] = iatoms[iu + 2]; |
884: |
885: if (i + s * nfa1 < nbonds) |
886: { |
887: coeff[0 * pack_size + s] = iparams[itype].lj14.c6A; |
888: coeff[1 * pack_size + s] = iparams[itype].lj14.c12A; |
889: coeff[2 * pack_size + s] = charge[ai[s]] * charge[aj[s]]; |
890: |
891: /* Avoid indexing the iatoms array out of bounds. |
892: * We pad the coordinate indices with the last atom pair. |
893: */ |
894: if (iu + nfa1 < nbonds) |
[...] |
902: coeff[0 * pack_size + s] = 0; |
903: coeff[1 * pack_size + s] = 0; |
904: coeff[2 * pack_size + s] = 0; |
[...] |
910: gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi[XX], &xi[YY], &xi[ZZ]); |
911: gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj[XX], &xj[YY], &xj[ZZ]); |
/home/eoseret/gromacs-2024.2/src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_float.h: 68 - 291 |
-------------------------------------------------------------------------------- |
68: return _mm512_slli_epi32(x.simdInternal_, 2); |
[...] |
113: v->simdInternal_ = _mm512_i32gather_ps(offset.simdInternal_, base, sizeof(float) * align_); |
[...] |
194: t5 = _mm512_unpacklo_ps(v0.simdInternal_, v2.simdInternal_); |
195: t6 = _mm512_unpackhi_ps(v0.simdInternal_, v2.simdInternal_); |
196: t7 = _mm512_unpacklo_ps(v1.simdInternal_, _mm512_setzero_ps()); |
197: t8 = _mm512_unpackhi_ps(v1.simdInternal_, _mm512_setzero_ps()); |
198: t[0] = _mm512_unpacklo_ps(t5, t7); // x0 y0 z0 0 | x4 y4 z4 0 |
199: t[1] = _mm512_unpackhi_ps(t5, t7); // x1 y1 z1 0 | x5 y5 z5 0 |
200: t[2] = _mm512_unpacklo_ps(t6, t8); // x2 y2 z2 0 | x6 y6 z6 0 |
201: t[3] = _mm512_unpackhi_ps(t6, t8); // x3 y3 z3 0 | x7 y7 z7 0 |
202: if (align % 4 == 0) |
203: { |
204: for (i = 0; i < 4; i++) |
205: { |
206: _mm_store_ps(base + o[i], |
207: _mm_add_ps(_mm_load_ps(base + o[i]), _mm512_castps512_ps128(t[i]))); |
208: _mm_store_ps(base + o[4 + i], |
209: _mm_add_ps(_mm_load_ps(base + o[4 + i]), _mm512_extractf32x4_ps(t[i], 1))); |
210: _mm_store_ps(base + o[8 + i], |
211: _mm_add_ps(_mm_load_ps(base + o[8 + i]), _mm512_extractf32x4_ps(t[i], 2))); |
212: _mm_store_ps(base + o[12 + i], |
213: _mm_add_ps(_mm_load_ps(base + o[12 + i]), _mm512_extractf32x4_ps(t[i], 3))); |
[...] |
284: _mm_store_ps(base + o[i], |
285: _mm_sub_ps(_mm_load_ps(base + o[i]), _mm512_castps512_ps128(t[i]))); |
286: _mm_store_ps(base + o[4 + i], |
287: _mm_sub_ps(_mm_load_ps(base + o[4 + i]), _mm512_extractf32x4_ps(t[i], 1))); |
288: _mm_store_ps(base + o[8 + i], |
289: _mm_sub_ps(_mm_load_ps(base + o[8 + i]), _mm512_extractf32x4_ps(t[i], 2))); |
290: _mm_store_ps(base + o[12 + i], |
291: _mm_sub_ps(_mm_load_ps(base + o[12 + i]), _mm512_extractf32x4_ps(t[i], 3))); |
/home/eoseret/gromacs-2024.2/src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_float.h: 181 - 451 |
-------------------------------------------------------------------------------- |
181: return { _mm512_add_ps(a.simdInternal_, b.simdInternal_) }; |
182: } |
183: |
184: static inline SimdFloat gmx_simdcall operator-(SimdFloat a, SimdFloat b) |
185: { |
186: return { _mm512_sub_ps(a.simdInternal_, b.simdInternal_) }; |
[...] |
197: return { _mm512_mul_ps(a.simdInternal_, b.simdInternal_) }; |
198: } |
199: |
200: static inline SimdFloat gmx_simdcall fma(SimdFloat a, SimdFloat b, SimdFloat c) |
201: { |
202: return { _mm512_fmadd_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_) }; |
203: } |
204: |
205: static inline SimdFloat gmx_simdcall fms(SimdFloat a, SimdFloat b, SimdFloat c) |
206: { |
207: return { _mm512_fmsub_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_) }; |
[...] |
224: return { _mm512_rsqrt14_ps(x.simdInternal_) }; |
[...] |
279: return { _mm512_roundscale_ps(x.simdInternal_, 0) }; |
[...] |
451: return { _mm512_mullo_epi32(a.simdInternal_, b.simdInternal_) }; |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►63.38+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►19.19+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
►17.43+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►78.14+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►18.88+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►2.99+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►79.27+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►19.47+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►1.26+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►97.16+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►2.84+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►80.61+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►12.16+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►7.12+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | (anonymous namespace)::calc_on[...] | listed_forces.cpp:334 | libgromacs_mpi.so.9.0.0 |
○ | .omp_outlined.#0xbe5d40 | listed_forces.cpp:428 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | ListedForces::calculate(gmx_wa[...] | listed_forces.cpp:387 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:2047 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.02 |
CQA speedup if FP arith vectorized | 1.09 |
CQA speedup if fully vectorized | 1.66 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.36 |
Bottlenecks | |
Function | do_pairs(int, int, int const*, t_iparams const*, float const (*) [3], float (*) [4], float (*) [3], t_pbc const*, float const*, float*, gmx::ArrayRef |
Source | pairs.cpp:873-873,pairs.cpp:879-879,pairs.cpp:910-911,impl_x86_avx_512_util_float.h:68-68,impl_x86_avx_512_util_float.h:113-113,impl_x86_avx_512_util_float.h:194-201,impl_x86_avx_512_util_float.h:206-213,impl_x86_avx_512_util_float.h:284-291,impl_x86_avx_512_simd_float.h:181-181,impl_x86_avx_512_simd_float.h:186-186,impl_x86_avx_512_simd_float.h:197-197,impl_x86_avx_512_simd_float.h:202-202,impl_x86_avx_512_simd_float.h:207-207,impl_x86_avx_512_simd_float.h:224-224,impl_x86_avx_512_simd_float.h:279-279,impl_x86_avx_512_simd_float.h:451-451 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 120.92 |
CQA cycles if no scalar integer | 118.83 |
CQA cycles if FP arith vectorized | 111.43 |
CQA cycles if fully vectorized | 73.00 |
Front-end cycles | 120.92 |
P0 cycles | 1.25 |
P1 cycles | 0.75 |
P2 cycles | 0.75 |
P3 cycles | 0.50 |
P4 cycles | 1.25 |
P5 cycles | 28.33 |
P6 cycles | 28.33 |
P7 cycles | 28.33 |
P8 cycles | 89.00 |
P9 cycles | 83.08 |
P10 cycles | 77.08 |
P11 cycles | 76.83 |
P12 cycles | 86.00 |
P13 cycles | 86.00 |
DIV/SQRT cycles | 0.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 229.50 |
Nb uops | 725.50 |
Nb loads | 49.00 |
Nb stores | 32.00 |
Nb stack references | 11.00 |
FLOP/cycle | 6.88 |
Nb FLOP add-sub | 304.00 |
Nb FLOP mul | 416.00 |
Nb FLOP fma | 48.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 16.00 |
Bytes/cycle | 17.00 |
Bytes prefetched | 0.00 |
Bytes loaded | 1544.00 |
Bytes stored | 512.00 |
Stride 0 | 2.50 |
Stride 1 | 0.00 |
Stride n | 2.00 |
Stride unknown | 1.00 |
Stride indirect | 3.00 |
Vectorization ratio all | 84.33 |
Vectorization ratio load | 100.00 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 62.64 |
Vector-efficiency ratio all | 45.45 |
Vector-efficiency ratio load | 50.00 |
Vector-efficiency ratio store | 25.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 44.19 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 35.85 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.02 |
CQA speedup if FP arith vectorized | 1.08 |
CQA speedup if fully vectorized | 1.66 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.36 |
Bottlenecks | micro-operation queue, |
Function | do_pairs(int, int, int const*, t_iparams const*, float const (*) [3], float (*) [4], float (*) [3], t_pbc const*, float const*, float*, gmx::ArrayRef |
Source | pairs.cpp:873-873,pairs.cpp:879-879,pairs.cpp:910-911,impl_x86_avx_512_util_float.h:68-68,impl_x86_avx_512_util_float.h:113-113,impl_x86_avx_512_util_float.h:194-201,impl_x86_avx_512_util_float.h:206-213,impl_x86_avx_512_util_float.h:284-291,impl_x86_avx_512_simd_float.h:181-181,impl_x86_avx_512_simd_float.h:186-186,impl_x86_avx_512_simd_float.h:197-197,impl_x86_avx_512_simd_float.h:202-202,impl_x86_avx_512_simd_float.h:207-207,impl_x86_avx_512_simd_float.h:224-224,impl_x86_avx_512_simd_float.h:279-279,impl_x86_avx_512_simd_float.h:451-451 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 121.00 |
CQA cycles if no scalar integer | 118.83 |
CQA cycles if FP arith vectorized | 111.53 |
CQA cycles if fully vectorized | 73.00 |
Front-end cycles | 121.00 |
P0 cycles | 1.50 |
P1 cycles | 0.75 |
P2 cycles | 0.75 |
P3 cycles | 0.50 |
P4 cycles | 1.50 |
P5 cycles | 28.33 |
P6 cycles | 28.33 |
P7 cycles | 28.33 |
P8 cycles | 89.00 |
P9 cycles | 83.08 |
P10 cycles | 77.08 |
P11 cycles | 76.83 |
P12 cycles | 86.00 |
P13 cycles | 86.00 |
DIV/SQRT cycles | 0.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 230.00 |
Nb uops | 726.00 |
Nb loads | 49.00 |
Nb stores | 32.00 |
Nb stack references | 11.00 |
FLOP/cycle | 6.88 |
Nb FLOP add-sub | 304.00 |
Nb FLOP mul | 416.00 |
Nb FLOP fma | 48.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 16.00 |
Bytes/cycle | 16.99 |
Bytes prefetched | 0.00 |
Bytes loaded | 1544.00 |
Bytes stored | 512.00 |
Stride 0 | 2.00 |
Stride 1 | 0.00 |
Stride n | 2.00 |
Stride unknown | 0.00 |
Stride indirect | 3.00 |
Vectorization ratio all | 84.33 |
Vectorization ratio load | 100.00 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 62.64 |
Vector-efficiency ratio all | 45.45 |
Vector-efficiency ratio load | 50.00 |
Vector-efficiency ratio store | 25.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 44.19 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 35.85 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.02 |
CQA speedup if FP arith vectorized | 1.09 |
CQA speedup if fully vectorized | 1.66 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.36 |
Bottlenecks | micro-operation queue, |
Function | do_pairs(int, int, int const*, t_iparams const*, float const (*) [3], float (*) [4], float (*) [3], t_pbc const*, float const*, float*, gmx::ArrayRef |
Source | pairs.cpp:873-873,pairs.cpp:879-879,pairs.cpp:910-911,impl_x86_avx_512_util_float.h:68-68,impl_x86_avx_512_util_float.h:113-113,impl_x86_avx_512_util_float.h:194-201,impl_x86_avx_512_util_float.h:206-213,impl_x86_avx_512_util_float.h:284-291,impl_x86_avx_512_simd_float.h:181-181,impl_x86_avx_512_simd_float.h:186-186,impl_x86_avx_512_simd_float.h:197-197,impl_x86_avx_512_simd_float.h:202-202,impl_x86_avx_512_simd_float.h:207-207,impl_x86_avx_512_simd_float.h:224-224,impl_x86_avx_512_simd_float.h:279-279,impl_x86_avx_512_simd_float.h:451-451 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 120.83 |
CQA cycles if no scalar integer | 118.83 |
CQA cycles if FP arith vectorized | 111.34 |
CQA cycles if fully vectorized | 73.00 |
Front-end cycles | 120.83 |
P0 cycles | 1.00 |
P1 cycles | 0.75 |
P2 cycles | 0.75 |
P3 cycles | 0.50 |
P4 cycles | 1.00 |
P5 cycles | 28.33 |
P6 cycles | 28.33 |
P7 cycles | 28.33 |
P8 cycles | 89.00 |
P9 cycles | 83.08 |
P10 cycles | 77.08 |
P11 cycles | 76.83 |
P12 cycles | 86.00 |
P13 cycles | 86.00 |
DIV/SQRT cycles | 0.00 |
Inter-iter dependencies cycles | 1 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 229.00 |
Nb uops | 725.00 |
Nb loads | 49.00 |
Nb stores | 32.00 |
Nb stack references | 11.00 |
FLOP/cycle | 6.89 |
Nb FLOP add-sub | 304.00 |
Nb FLOP mul | 416.00 |
Nb FLOP fma | 48.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 16.00 |
Bytes/cycle | 17.02 |
Bytes prefetched | 0.00 |
Bytes loaded | 1544.00 |
Bytes stored | 512.00 |
Stride 0 | 3.00 |
Stride 1 | 0.00 |
Stride n | 2.00 |
Stride unknown | 2.00 |
Stride indirect | 3.00 |
Vectorization ratio all | 84.33 |
Vectorization ratio load | 100.00 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 62.64 |
Vector-efficiency ratio all | 45.45 |
Vector-efficiency ratio load | 50.00 |
Vector-efficiency ratio store | 25.00 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 44.19 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 35.85 |
Path / |
Function | do_pairs(int, int, int const*, t_iparams const*, float const (*) [3], float (*) [4], float (*) [3], t_pbc const*, float const*, float*, gmx::ArrayRef |
Source file and lines | pairs.cpp:873-911 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 229.50 |
nb uops | 725.50 |
loop length | 1409.50 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 19 |
used ymm registers | 9 |
used zmm registers | 26 |
nb stack references | 11 |
ADD-SUB / MUL ratio | 1.65 |
micro-operation queue | 120.92 cycles |
front end | 120.92 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.25 | 0.75 | 0.75 | 0.50 | 1.25 | 25.00 | 25.00 | 25.00 | 71.50 | 71.58 | 71.58 | 71.33 | 86.00 | 86.00 |
cycles | 1.25 | 0.75 | 0.75 | 0.50 | 1.25 | 28.33 | 28.33 | 28.33 | 89.00 | 83.08 | 77.08 | 76.83 | 86.00 | 86.00 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 1.00 |
Front-end | 120.92 |
Dispatch | 89.00 |
Data deps. | 1.00 |
Overall L1 | 120.92 |
all | 43% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 39% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 84% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 62% |
all | 31% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 27% |
all | 50% |
load | 47% |
store | 25% |
mul | 100% |
add-sub | 44% |
fma | 100% |
div/sqrt | 100% |
other | 50% |
all | 45% |
load | 50% |
store | 25% |
mul | 100% |
add-sub | 44% |
fma | 100% |
div/sqrt | 100% |
other | 35% |
Function | do_pairs(int, int, int const*, t_iparams const*, float const (*) [3], float (*) [4], float (*) [3], t_pbc const*, float const*, float*, gmx::ArrayRef |
Source file and lines | pairs.cpp:873-911 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 230 |
nb uops | 726 |
loop length | 1412 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 19 |
used ymm registers | 9 |
used zmm registers | 26 |
nb stack references | 11 |
ADD-SUB / MUL ratio | 1.65 |
micro-operation queue | 121.00 cycles |
front end | 121.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 0.75 | 0.75 | 0.50 | 1.50 | 25.00 | 25.00 | 25.00 | 71.50 | 71.58 | 71.58 | 71.33 | 86.00 | 86.00 |
cycles | 1.50 | 0.75 | 0.75 | 0.50 | 1.50 | 28.33 | 28.33 | 28.33 | 89.00 | 83.08 | 77.08 | 76.83 | 86.00 | 86.00 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 1.00 |
Front-end | 121.00 |
Dispatch | 89.00 |
Data deps. | 1.00 |
Overall L1 | 121.00 |
all | 43% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 39% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 84% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 62% |
all | 31% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 27% |
all | 50% |
load | 47% |
store | 25% |
mul | 100% |
add-sub | 44% |
fma | 100% |
div/sqrt | 100% |
other | 50% |
all | 45% |
load | 50% |
store | 25% |
mul | 100% |
add-sub | 44% |
fma | 100% |
div/sqrt | 100% |
other | 35% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VMOVDQA64 0xc0(%RSP),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VPMULLD %ZMM10,%ZMM0,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VGATHERDPS (%R13,%ZMM1,4),%ZMM2{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VXORPS %XMM19,%XMM19,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
MOV 0x88(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VGATHERDPS (%RCX,%ZMM1,4),%ZMM19{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VXORPS %XMM20,%XMM20,%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VGATHERDPS (%RDX,%ZMM1,4),%ZMM20{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
VMOVDQA64 0x100(%RSP),%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VPMULLD %ZMM10,%ZMM18,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VXORPS %XMM21,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VGATHERDPS (%R13,%ZMM1,4),%ZMM21{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VXORPS %XMM22,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VGATHERDPS (%RCX,%ZMM1,4),%ZMM22{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VXORPS %XMM23,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VGATHERDPS (%RDX,%ZMM1,4),%ZMM23{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
VSUBPS %ZMM21,%ZMM2,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM22,%ZMM19,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x2c0(%RSP),%ZMM12,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM23,%ZMM20,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x340(%RSP),%ZMM20,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VRNDSCALEPS $0,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x180(%RSP),%ZMM21,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM22,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x140(%RSP),%ZMM21,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM22,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x200(%RSP),%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM5,%ZMM2,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VRNDSCALEPS $0,%ZMM22,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM21,%ZMM20,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM6,%ZMM22,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM21,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM22,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM21,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM8,%ZMM1,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VRNDSCALEPS $0,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM9,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM21,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM1,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM2,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM22,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM20,%ZMM20,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM22,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VRSQRT14PS %ZMM21,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMULPS %ZMM21,%ZMM22,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM13,%ZMM22,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM14,%ZMM22,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM21,%ZMM23,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM21,%ZMM21,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM22,%ZMM22,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM23,%ZMM22,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x240(%RSP),%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x300(%RSP),%ZMM3,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM21,%ZMM24,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x280(%RSP),%ZMM15,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD231PS %ZMM19,%ZMM23,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM21,%ZMM23,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM24,%ZMM22,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM19,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM19,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM19,%ZMM20,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VPSLLD $0x2,%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPERMD %ZMM0,%ZMM16,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VPERMD %ZMM0,%ZMM17,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VUNPCKLPS %ZMM19,%ZMM1,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VUNPCKHPS %ZMM19,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VUNPCKLPS %ZMM11,%ZMM2,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VUNPCKHPS %ZMM11,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VUNPCKLPS %ZMM19,%ZMM20,%ZMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VUNPCKHPS %ZMM19,%ZMM20,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VUNPCKLPS %ZMM2,%ZMM1,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VUNPCKHPS %ZMM2,%ZMM1,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VEXTRACTF32X4 $0x1,%YMM30,%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VEXTRACTF32X4 $0x2,%ZMM30,%XMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VEXTRACTF32X4 $0x3,%ZMM30,%XMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VEXTRACTF32X4 $0x1,%YMM21,%XMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VEXTRACTF32X4 $0x2,%ZMM21,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VPMOVSXDQ %YMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VMOVQ %XMM0,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM30,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM1,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM0,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM31,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM1,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VMOVQ %XMM1,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM28,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM2,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM1,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM26,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM1,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x2,%ZMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM1,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM21,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM2,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM1,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM24,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM1,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x3,%ZMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM0,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM22,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM1,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM0,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VEXTRACTF32X4 $0x3,%ZMM21,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VADDPS (%R15,%R8,4),%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM0,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTF32X4 $0x1,%YMM20,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VEXTRACTF32X4 $0x2,%ZMM20,%XMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VEXTRACTF32X4 $0x3,%ZMM20,%XMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VEXTRACTF32X4 $0x1,%YMM19,%XMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VEXTRACTF32X4 $0x2,%ZMM19,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VPMOVSXDQ %YMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VMOVQ %XMM2,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM20,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM2,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM0,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI128 $0x1,%YMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VMOVQ %XMM3,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM29,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM4,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM3,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM27,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x2,%ZMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM3,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM19,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM4,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM3,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM25,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x3,%ZMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM2,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM23,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM2,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VEXTRACTF32X4 $0x3,%ZMM19,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VADDPS (%R15,%R8,4),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPSLLD $0x2,%ZMM18,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPERMD %ZMM3,%ZMM17,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VPMOVSXDQ %YMM4,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VMOVQ %XMM4,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM30,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM18,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM4,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM31,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM18,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x1,%YMM4,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VMOVQ %XMM18,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM28,%XMM30,%XMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM28,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM18,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM26,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM18,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x2,%ZMM4,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM18,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM21,%XMM26,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM21,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM18,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM24,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM18,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x3,%ZMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM4,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM22,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM18,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM4,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM1,%XMM4,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMD %ZMM3,%ZMM16,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMOVAPS %XMM1,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPMOVSXDQ %YMM3,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VMOVQ %XMM1,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM20,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM1,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM0,%XMM3,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM0,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI128 $0x1,%YMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VMOVQ %XMM0,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM29,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM0,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM27,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM0,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x2,%ZMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM0,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM19,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM0,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM25,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM0,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x3,%ZMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM0,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM23,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM1,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM0,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM2,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM0,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
ADD $0x30,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RSI,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JAE bedab0 <_Z8do_pairsiiPKiPK9t_iparamsPA3_KfPA4_fPA3_fPK5t_pbcPS4_PfN3gmx8ArrayRefIS4_EESI_NSH_IKbEENSH_IKtEEiPK10t_forcerecbRKNSG_12StepWorkloadEP17gmx_grppairener_tPi+0x1d20> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RDI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
MOV %EDI,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
JMP bec7f1 <_Z8do_pairsiiPKiPK9t_iparamsPA3_KfPA4_fPA3_fPK5t_pbcPS4_PfN3gmx8ArrayRefIS4_EESI_NSH_IKbEENSH_IKtEEiPK10t_forcerecbRKNSG_12StepWorkloadEP17gmx_grppairener_tPi+0xa61> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
JMP bec210 <_Z8do_pairsiiPKiPK9t_iparamsPA3_KfPA4_fPA3_fPK5t_pbcPS4_PfN3gmx8ArrayRefIS4_EESI_NSH_IKbEENSH_IKtEEiPK10t_forcerecbRKNSG_12StepWorkloadEP17gmx_grppairener_tPi+0x480> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
Function | do_pairs(int, int, int const*, t_iparams const*, float const (*) [3], float (*) [4], float (*) [3], t_pbc const*, float const*, float*, gmx::ArrayRef |
Source file and lines | pairs.cpp:873-911 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 229 |
nb uops | 725 |
loop length | 1407 |
used x86 registers | 10 |
used mmx registers | 0 |
used xmm registers | 19 |
used ymm registers | 9 |
used zmm registers | 26 |
nb stack references | 11 |
ADD-SUB / MUL ratio | 1.65 |
micro-operation queue | 120.83 cycles |
front end | 120.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.00 | 0.75 | 0.75 | 0.50 | 1.00 | 25.00 | 25.00 | 25.00 | 71.50 | 71.58 | 71.58 | 71.33 | 86.00 | 86.00 |
cycles | 1.00 | 0.75 | 0.75 | 0.50 | 1.00 | 28.33 | 28.33 | 28.33 | 89.00 | 83.08 | 77.08 | 76.83 | 86.00 | 86.00 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 1.00 |
Front-end | 120.83 |
Dispatch | 89.00 |
Data deps. | 1.00 |
Overall L1 | 120.83 |
all | 43% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 39% |
all | 100% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 84% |
load | 100% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 62% |
all | 31% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 27% |
all | 50% |
load | 47% |
store | 25% |
mul | 100% |
add-sub | 44% |
fma | 100% |
div/sqrt | 100% |
other | 50% |
all | 45% |
load | 50% |
store | 25% |
mul | 100% |
add-sub | 44% |
fma | 100% |
div/sqrt | 100% |
other | 35% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VMOVDQA64 0xc0(%RSP),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VPMULLD %ZMM10,%ZMM0,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VXORPS %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VGATHERDPS (%R13,%ZMM1,4),%ZMM2{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VXORPS %XMM19,%XMM19,%XMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
MOV 0x88(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VGATHERDPS (%RCX,%ZMM1,4),%ZMM19{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VXORPS %XMM20,%XMM20,%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VGATHERDPS (%RDX,%ZMM1,4),%ZMM20{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
VMOVDQA64 0x100(%RSP),%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VPMULLD %ZMM10,%ZMM18,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VXORPS %XMM21,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VGATHERDPS (%R13,%ZMM1,4),%ZMM21{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VXORPS %XMM22,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VGATHERDPS (%RCX,%ZMM1,4),%ZMM22{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
KXNORW %K0,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VXORPS %XMM23,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VGATHERDPS (%RDX,%ZMM1,4),%ZMM23{%K1} | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.50 | 6.67 | 7.17 | 5.67 | 9 | 9 | 0-21 | 16.56 | vect (100.0%) |
VSUBPS %ZMM21,%ZMM2,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM22,%ZMM19,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x2c0(%RSP),%ZMM12,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM23,%ZMM20,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x340(%RSP),%ZMM20,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VRNDSCALEPS $0,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x180(%RSP),%ZMM21,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM22,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x140(%RSP),%ZMM21,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM22,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x200(%RSP),%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM5,%ZMM2,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VRNDSCALEPS $0,%ZMM22,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM21,%ZMM20,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM6,%ZMM22,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM21,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM22,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM21,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM8,%ZMM1,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VRNDSCALEPS $0,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM9,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM21,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM1,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM2,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM22,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM20,%ZMM20,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM22,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VRSQRT14PS %ZMM21,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMULPS %ZMM21,%ZMM22,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM13,%ZMM22,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM14,%ZMM22,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM21,%ZMM23,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM21,%ZMM21,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM22,%ZMM22,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM23,%ZMM22,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x240(%RSP),%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x300(%RSP),%ZMM3,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM21,%ZMM24,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x280(%RSP),%ZMM15,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD231PS %ZMM19,%ZMM23,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM21,%ZMM23,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM24,%ZMM22,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM19,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM19,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM19,%ZMM20,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VPSLLD $0x2,%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPERMD %ZMM0,%ZMM16,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VPERMD %ZMM0,%ZMM17,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VUNPCKLPS %ZMM19,%ZMM1,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VUNPCKHPS %ZMM19,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VUNPCKLPS %ZMM11,%ZMM2,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VUNPCKHPS %ZMM11,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VUNPCKLPS %ZMM19,%ZMM20,%ZMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VUNPCKHPS %ZMM19,%ZMM20,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VUNPCKLPS %ZMM2,%ZMM1,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VUNPCKHPS %ZMM2,%ZMM1,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.67 | 0.67 | 0.67 | 0 | 0 | 1 | 0.67 | vect (50.0%) |
VEXTRACTF32X4 $0x1,%YMM30,%XMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VEXTRACTF32X4 $0x2,%ZMM30,%XMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VEXTRACTF32X4 $0x3,%ZMM30,%XMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VEXTRACTF32X4 $0x1,%YMM21,%XMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VEXTRACTF32X4 $0x2,%ZMM21,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VPMOVSXDQ %YMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VMOVQ %XMM0,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM30,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM1,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM0,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM31,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM1,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VMOVQ %XMM1,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM28,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM2,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM1,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM26,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM1,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x2,%ZMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM1,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM21,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM2,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM1,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM24,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM1,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x3,%ZMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM0,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM22,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM1,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM0,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VEXTRACTF32X4 $0x3,%ZMM21,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VADDPS (%R15,%R8,4),%XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM0,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTF32X4 $0x1,%YMM20,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VEXTRACTF32X4 $0x2,%ZMM20,%XMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VEXTRACTF32X4 $0x3,%ZMM20,%XMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VEXTRACTF32X4 $0x1,%YMM19,%XMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VEXTRACTF32X4 $0x2,%ZMM19,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VPMOVSXDQ %YMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VMOVQ %XMM2,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM20,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM2,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM0,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI128 $0x1,%YMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VMOVQ %XMM3,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM29,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM4,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM3,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM27,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x2,%ZMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM3,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM19,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM4,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM3,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM25,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x3,%ZMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM2,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VADDPS (%R15,%R8,4),%XMM23,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM2,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VEXTRACTF32X4 $0x3,%ZMM19,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VADDPS (%R15,%R8,4),%XMM2,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPSLLD $0x2,%ZMM18,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPERMD %ZMM3,%ZMM17,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VPMOVSXDQ %YMM4,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VMOVQ %XMM4,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM30,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM18,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM4,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM31,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM18,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x1,%YMM4,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VMOVQ %XMM18,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM28,%XMM30,%XMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM28,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM18,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM26,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM18,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x2,%ZMM4,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM18,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM21,%XMM26,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM21,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM18,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM24,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM18,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x3,%ZMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM4,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM22,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM18,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM4,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM1,%XMM4,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMD %ZMM3,%ZMM16,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMOVAPS %XMM1,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPMOVSXDQ %YMM3,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VMOVQ %XMM1,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM20,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM1,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM0,%XMM3,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM0,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI128 $0x1,%YMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VMOVQ %XMM0,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM29,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM0,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM27,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM0,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x2,%ZMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM0,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM19,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM3,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM0,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM25,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM0,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VEXTRACTI32X4 $0x3,%ZMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (25.0%) |
VMOVQ %XMM0,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM23,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM1,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
VPEXTRQ $0x1,%XMM0,%R8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVAPS (%R15,%R8,4),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VSUBPS %XMM2,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VMOVAPS %XMM0,(%R15,%R8,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (25.0%) |
ADD $0x30,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RSI,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JAE bedab0 <_Z8do_pairsiiPKiPK9t_iparamsPA3_KfPA4_fPA3_fPK5t_pbcPS4_PfN3gmx8ArrayRefIS4_EESI_NSH_IKbEENSH_IKtEEiPK10t_forcerecbRKNSG_12StepWorkloadEP17gmx_grppairener_tPi+0x1d20> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RDI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
MOV %EDI,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
JMP bec7f1 <_Z8do_pairsiiPKiPK9t_iparamsPA3_KfPA4_fPA3_fPK5t_pbcPS4_PfN3gmx8ArrayRefIS4_EESI_NSH_IKbEENSH_IKtEEiPK10t_forcerecbRKNSG_12StepWorkloadEP17gmx_grppairener_tPi+0xa61> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
Run 1x1 | Number processes: 1Number processes per node: 1OMP_NUM_THREADS: 1 |
---|---|
Run 2x1 | Number processes: 2Number processes per node: 2OMP_NUM_THREADS: 1 |
Run 4x1 | Number processes: 4Number processes per node: 4OMP_NUM_THREADS: 1 |
Run 8x1 | Number processes: 8Number processes per node: 8OMP_NUM_THREADS: 1 |
Run 16x1 | Number processes: 16Number processes per node: 16OMP_NUM_THREADS: 1 |
Run 32x1 | Number processes: 32Number processes per node: 32OMP_NUM_THREADS: 1 |
Run 64x1 | Number processes: 64Number processes per node: 64OMP_NUM_THREADS: 1 |
Run 128x1 | Number processes: 128Number processes per node: 128OMP_NUM_THREADS: 1 |
Run 192x1 | Number processes: 192Number nodes: 1Number processes per node: 192Run Command: <executable> mdrun -s ion_channel.tpr -nsteps 10000 -pin on -deffnm aoccMPI Command: mpirun -genv I_MPI_FABRICS=shm -n <number_processes>Dataset: Run Directory: .OMP_NUM_THREADS: 1 |
(1x1) Efficiency | (1x1) Potential Speed-Up (%) | (2x1) Efficiency | (2x1) Potential Speed-Up (%) | (4x1) Efficiency | (4x1) Potential Speed-Up (%) | (8x1) Efficiency | (8x1) Potential Speed-Up (%) | (16x1) Efficiency | (16x1) Potential Speed-Up (%) | (32x1) Efficiency | (32x1) Potential Speed-Up (%) | (64x1) Efficiency | (64x1) Potential Speed-Up (%) | (128x1) Efficiency | (128x1) Potential Speed-Up (%) | (192x1) Efficiency | (192x1) Potential Speed-Up (%) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 1.01 | -0 | 1.03 | 0 | 1.02 | 0 | 1.11 | 0 | 1.07 | 0 | 1.1 | 0 | 1.01 | -0 | 1.03 | 0 |
Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
---|---|---|---|---|---|---|
1x1 | 1 | 1 | 1 | 1 | 4.6549973487854 | 0.55841791629791 |
2x1 | 2 | 1.01 | 2.02 | 2 | 2.6949973106384 | 0.50907808542252 |
4x1 | 4 | 1.03 | 4.11 | 4 | 1.5800017118454 | 0.45434391498566 |
8x1 | 8 | 1.02 | 8.16 | 8 | 0.88999962806702 | 0.36853846907616 |
16x1 | 16 | 1.11 | 17.72 | 16 | 0.51500022411346 | 0.31767144799232 |
32x1 | 20 | 1.07 | 34.09 | 32 | 0.53499972820282 | 0.26572406291962 |
64x1 | 38 | 1.1 | 70.19 | 64 | 0.24500001966953 | 0.16686478257179 |
128x1 | 70 | 1.01 | 128.68 | 128 | 0.19999991357327 | 0.21286487579346 |
192x1 | 102 | 1.03 | 198.38 | 192 | 0.13000001013279 | 0.17420598864555 |