Loop Id: 847 | Module: libgromacs_mpi.so.9.0.0 | Source: impl_x86_avx_512_simd_float.h:70-461 [...] | Coverage: 0.10% |
---|
Loop Id: 847 | Module: libgromacs_mpi.so.9.0.0 | Source: impl_x86_avx_512_simd_float.h:70-461 [...] | Coverage: 0.10% |
---|
0x52d6d0 KANDD %K0,%K4,%K4 |
0x52d6d5 KANDD %K1,%K5,%K5 |
0x52d6da VMOVAPS 0x1c0(%RSP),%ZMM26 [9] |
0x52d6e2 VPTESTNMD %ZMM6,%ZMM6,%K7 |
0x52d6e8 VBROADCASTSS -0x1d13ba(%RIP),%ZMM6 [8] |
0x52d6f2 VPTESTMD %ZMM5,%ZMM5,%K6 |
0x52d6f8 VMAXPS %ZMM6,%ZMM7,%ZMM22 |
0x52d6fe VMAXPS %ZMM6,%ZMM8,%ZMM21 |
0x52d704 VMOVAPS %ZMM22,%ZMM5{%K4}{z} |
0x52d70a VMOVAPS 0x540(%RSP),%ZMM0 [9] |
0x52d712 VMULPS %ZMM5,%ZMM0,%ZMM13 |
0x52d718 VMOVAPS %ZMM21,%ZMM5{%K5}{z} |
0x52d71e VMULPS %ZMM5,%ZMM0,%ZMM12 |
0x52d724 VMULPS %ZMM13,%ZMM13,%ZMM24 |
0x52d72a VBROADCASTSS -0x1d0f48(%RIP),%ZMM28 [8] |
0x52d734 VBROADCASTSS -0x1d10b2(%RIP),%ZMM14 [8] |
0x52d73e VMOVAPS %ZMM14,%ZMM18 |
0x52d744 VFMADD213PS %ZMM28,%ZMM24,%ZMM18 |
0x52d74a VBROADCASTSS -0x1d131c(%RIP),%ZMM31 [8] |
0x52d754 VBROADCASTSS -0x1d1456(%RIP),%ZMM5 [8] |
0x52d75e VMOVAPS %ZMM5,%ZMM6 |
0x52d764 VFMADD213PS %ZMM31,%ZMM24,%ZMM6 |
0x52d76a VBROADCASTSS -0x1d15e8(%RIP),%ZMM27 [8] |
0x52d774 VBROADCASTSS -0x1d14be(%RIP),%ZMM3 [8] |
0x52d77e VBROADCASTSS -0x1d11e0(%RIP),%ZMM20 [8] |
0x52d788 VFMADD213PS %ZMM27,%ZMM24,%ZMM18 |
0x52d78e VMOVAPS %ZMM20,%ZMM23 |
0x52d794 VFMADD213PS %ZMM3,%ZMM24,%ZMM23 |
0x52d79a VBROADCASTSS -0x1d11b8(%RIP),%ZMM16 [8] |
0x52d7a4 VBROADCASTSS -0x1d1046(%RIP),%ZMM7 [8] |
0x52d7ae VMOVAPS %ZMM7,%ZMM8 |
0x52d7b4 VFMADD213PS %ZMM16,%ZMM24,%ZMM8 |
0x52d7ba VBROADCASTSS -0x1d1578(%RIP),%ZMM0 [8] |
0x52d7c4 VFMADD213PS %ZMM0,%ZMM24,%ZMM23 |
0x52d7ca VBROADCASTSS -0x1d15bc(%RIP),%ZMM1 [8] |
0x52d7d4 VFMADD213PS %ZMM1,%ZMM24,%ZMM8 |
0x52d7da VBROADCASTSS -0x1d12fc(%RIP),%ZMM2 [8] |
0x52d7e4 VFMADD213PS %ZMM2,%ZMM24,%ZMM23 |
0x52d7ea VMULPS %ZMM12,%ZMM12,%ZMM9 |
0x52d7f0 VFMADD213PS %ZMM28,%ZMM9,%ZMM14 |
0x52d7f6 VFMADD213PS %ZMM31,%ZMM9,%ZMM5 |
0x52d7fc VFMADD213PS %ZMM3,%ZMM9,%ZMM20 |
0x52d802 VFMADD213PS %ZMM16,%ZMM9,%ZMM7 |
0x52d808 VFMADD213PS %ZMM0,%ZMM9,%ZMM20 |
0x52d80e VFMADD213PS %ZMM1,%ZMM9,%ZMM7 |
0x52d814 VFMADD213PS %ZMM2,%ZMM9,%ZMM20 |
0x52d81a VBROADCASTSS -0x1d1404(%RIP),%ZMM0 [8] |
0x52d824 VBROADCASTSS -0x1d1092(%RIP),%ZMM1 [8] |
0x52d82e VMOVAPS %ZMM1,%ZMM2 |
0x52d834 VBROADCASTSS -0x1d12fa(%RIP),%ZMM3 [8] |
0x52d83e VFMADD213PS %ZMM0,%ZMM24,%ZMM2 |
0x52d844 VMOVAPS %ZMM3,%ZMM16 |
0x52d84a VFMADD213PS %ZMM27,%ZMM24,%ZMM16 |
0x52d850 VBROADCASTSS -0x1d1066(%RIP),%ZMM28 [8] |
0x52d85a VBROADCASTSS -0x1d1428(%RIP),%ZMM31 [8] |
0x52d864 VMOVAPS %ZMM31,%ZMM4 |
0x52d86a VFMADD213PS %ZMM28,%ZMM24,%ZMM4 |
0x52d870 VBROADCASTSS -0x1d1692(%RIP),%ZMM10 [8] |
0x52d87a VBROADCASTSS -0x1d133c(%RIP),%ZMM11 [8] |
0x52d884 VMOVAPS %ZMM11,%ZMM15 |
0x52d88a VFMADD213PS %ZMM10,%ZMM24,%ZMM15 |
0x52d890 VBROADCASTSS -0x1d14d2(%RIP),%ZMM17 [8] |
0x52d89a VFMADD213PS %ZMM17,%ZMM24,%ZMM4 |
0x52d8a0 VBROADCASTSS -0x1d1176(%RIP),%ZMM19 [8] |
0x52d8aa VFMADD213PS %ZMM19,%ZMM24,%ZMM15 |
0x52d8b0 VBROADCASTSS -0x1d1082(%RIP),%ZMM25 [8] |
0x52d8ba VFMADD213PS %ZMM25,%ZMM24,%ZMM4 |
0x52d8c0 VFMADD213PS %ZMM0,%ZMM9,%ZMM1 |
0x52d8c6 VFMADD213PS %ZMM27,%ZMM9,%ZMM14 |
0x52d8cc VFMADD213PS %ZMM27,%ZMM9,%ZMM3 |
0x52d8d2 VFMADD213PS %ZMM28,%ZMM9,%ZMM31 |
0x52d8d8 VFMADD213PS %ZMM10,%ZMM9,%ZMM11 |
0x52d8de VFMADD213PS %ZMM17,%ZMM9,%ZMM31 |
0x52d8e4 VRSQRT14PS %ZMM22,%ZMM0 |
0x52d8ea VFMADD213PS %ZMM19,%ZMM9,%ZMM11 |
0x52d8f0 VBROADCASTSS -0x1d147e(%RIP),%ZMM10 [8] |
0x52d8fa VFMADD213PS %ZMM25,%ZMM9,%ZMM31 |
0x52d900 VBROADCASTSS -0x1d17b6(%RIP),%ZMM9 [8] |
0x52d90a VMULPS %ZMM0,%ZMM22,%ZMM17 |
0x52d910 VFMADD213PS %ZMM9,%ZMM0,%ZMM17 |
0x52d916 VRSQRT14PS %ZMM21,%ZMM19 |
0x52d91c VMULPS %ZMM19,%ZMM21,%ZMM21 |
0x52d922 VFMADD213PS %ZMM9,%ZMM19,%ZMM21 |
0x52d928 VMULPS %ZMM10,%ZMM0,%ZMM0 |
0x52d92e VMULPS %ZMM10,%ZMM19,%ZMM9 |
0x52d934 VMULPS %ZMM17,%ZMM0,%ZMM0{%K4}{z} |
0x52d93a VMULPS %ZMM21,%ZMM9,%ZMM9{%K5}{z} |
0x52d940 VFMADD231PS %ZMM6,%ZMM13,%ZMM18 |
0x52d946 VFMADD231PS %ZMM8,%ZMM13,%ZMM23 |
0x52d94c VFMADD231PS %ZMM5,%ZMM12,%ZMM14 |
0x52d952 VRCP14PS %ZMM18,%ZMM5 |
0x52d958 VFMADD231PS %ZMM7,%ZMM12,%ZMM20 |
0x52d95e VBROADCASTSS -0x1d1428(%RIP),%ZMM6 [8] |
0x52d968 VFMADD231PS %ZMM2,%ZMM13,%ZMM16 |
0x52d96e VRCP14PS %ZMM16,%ZMM2 |
0x52d974 VFNMADD213PS %ZMM6,%ZMM2,%ZMM16 |
0x52d97a VMULPS %ZMM16,%ZMM2,%ZMM2 |
0x52d980 VFMADD231PS %ZMM15,%ZMM13,%ZMM4 |
0x52d986 VRCP14PS %ZMM14,%ZMM7 |
0x52d98c VMULPS %ZMM2,%ZMM4,%ZMM2 |
0x52d992 VFNMADD213PS %ZMM6,%ZMM5,%ZMM18 |
0x52d998 VFMADD231PS %ZMM1,%ZMM12,%ZMM3 |
0x52d99e VRCP14PS %ZMM3,%ZMM1 |
0x52d9a4 VFNMADD213PS %ZMM6,%ZMM7,%ZMM14 |
0x52d9aa VFNMADD213PS %ZMM6,%ZMM1,%ZMM3 |
0x52d9b0 VMULPS %ZMM3,%ZMM1,%ZMM1 |
0x52d9b6 VFMADD231PS %ZMM11,%ZMM12,%ZMM31 |
0x52d9bc VMOVAPS 0x580(%RSP),%ZMM8 [9] |
0x52d9c4 VMULPS %ZMM2,%ZMM8,%ZMM2 |
0x52d9ca VMULPS %ZMM1,%ZMM31,%ZMM1 |
0x52d9d0 VMULPS %ZMM1,%ZMM8,%ZMM1 |
0x52d9d6 VMOVAPS %ZMM29,%ZMM3{%K6}{z} |
0x52d9dc VADDPS %ZMM2,%ZMM3,%ZMM2 |
0x52d9e2 VXORPS %XMM6,%XMM6,%XMM6 |
0x52d9e6 VBLENDMPS %ZMM6,%ZMM29,%ZMM3{%K7} |
0x52d9ec VADDPS %ZMM1,%ZMM3,%ZMM1 |
0x52d9f2 VMULPS %ZMM18,%ZMM5,%ZMM3 |
0x52d9f8 VMULPS %ZMM3,%ZMM23,%ZMM3 |
0x52d9fe VMOVAPS %ZMM0,%ZMM4{%K6}{z} |
0x52da04 VMULPS %ZMM3,%ZMM8,%ZMM3 |
0x52da0a VFMADD213PS %ZMM4,%ZMM13,%ZMM3 |
0x52da10 VSUBPS %ZMM2,%ZMM4,%ZMM2 |
0x52da16 VBROADCASTF64X4 (%R8,%R13,4),%ZMM4 [1] |
0x52da1d VMULPS %ZMM14,%ZMM7,%ZMM5 |
0x52da23 VBLENDMPS %ZMM6,%ZMM9,%ZMM6{%K7} |
0x52da29 VSUBPS %ZMM1,%ZMM6,%ZMM1 |
0x52da2f MOV %R12,%RAX |
0x52da32 SAL $0x6,%R12 |
0x52da36 VMULPS %ZMM5,%ZMM20,%ZMM5 |
0x52da3c VBROADCASTF64X4 (%R14,%R12,1),%ZMM7 [7] |
0x52da43 VADDPS 0x440(%RSP),%ZMM7,%ZMM7 [9] |
0x52da4b VMULPS %ZMM5,%ZMM8,%ZMM5 |
0x52da51 VFMADD213PS %ZMM6,%ZMM12,%ZMM5 |
0x52da57 VMULPS %ZMM7,%ZMM0,%ZMM6 |
0x52da5d VMULPS %ZMM6,%ZMM6,%ZMM6 |
0x52da63 VMULPS %ZMM6,%ZMM6,%ZMM8 |
0x52da69 VMULPS %ZMM8,%ZMM6,%ZMM6{%K6}{z} |
0x52da6f VMULPS %ZMM7,%ZMM7,%ZMM7 |
0x52da75 VMULPS %ZMM7,%ZMM7,%ZMM8 |
0x52da7b VMULPS %ZMM8,%ZMM7,%ZMM7 |
0x52da81 VMULPS 0x5c0(%RSP),%ZMM4,%ZMM8 [9] |
0x52da89 VMULPS %ZMM2,%ZMM8,%ZMM2{%K4}{z} |
0x52da8f VADDPS 0x80(%RSP),%ZMM2,%ZMM2 [9] |
0x52da97 VMOVAPS 0x740(%RSP),%ZMM22 [9] |
0x52da9f VMULPS %ZMM4,%ZMM22,%ZMM4 |
0x52daa5 VMULPS %ZMM1,%ZMM4,%ZMM1{%K5}{z} |
0x52daab VADDPS %ZMM1,%ZMM2,%ZMM1 |
0x52dab1 VMOVAPS %ZMM1,0x80(%RSP) [9] |
0x52dab9 SAL $0x4,%RAX |
0x52dabd OR $0x8,%EAX |
0x52dac0 CLTQ |
0x52dac2 VMULPS %ZMM3,%ZMM8,%ZMM1 |
0x52dac8 VBROADCASTF64X4 (%R14,%RAX,4),%ZMM2 [7] |
0x52dacf VMULPS 0x400(%RSP),%ZMM2,%ZMM2 [9] |
0x52dad7 VMULPS %ZMM5,%ZMM4,%ZMM3 |
0x52dadd VMULPS %ZMM6,%ZMM2,%ZMM4 |
0x52dae3 VMULPS %ZMM7,%ZMM2,%ZMM2 |
0x52dae9 VMULPS %ZMM9,%ZMM9,%ZMM5 |
0x52daef VMULPS %ZMM3,%ZMM5,%ZMM3 |
0x52daf5 VMOVAPS 0x180(%RSP),%ZMM18 [9] |
0x52dafd VMOVAPS %ZMM18,%ZMM5 |
0x52db03 VFMADD213PS %ZMM4,%ZMM2,%ZMM5 |
0x52db09 VMULPS %ZMM4,%ZMM6,%ZMM6 |
0x52db0f VSUBPS %ZMM4,%ZMM6,%ZMM4 |
0x52db15 VADDPS %ZMM4,%ZMM1,%ZMM1 |
0x52db1b VMULPS %ZMM0,%ZMM0,%ZMM0 |
0x52db21 VMULPS %ZMM1,%ZMM0,%ZMM0 |
0x52db27 VMULPS %ZMM2,%ZMM7,%ZMM1 |
0x52db2d VFMADD213PS %ZMM6,%ZMM26,%ZMM1 |
0x52db33 VMULPS -0x1d134d(%RIP){1to16},%ZMM5,%ZMM2 [8] |
0x52db3d KANDW %K6,%K4,%K4 |
0x52db41 VFMADD132PS -0x1d1317(%RIP){1to16},%ZMM2,%ZMM1{%K4}{z} [8] |
0x52db4b VMOVAPS 0x100(%RSP),%ZMM21 [9] |
0x52db53 VADDPS %ZMM1,%ZMM21,%ZMM21 |
0x52db59 VMULPS 0x3c0(%RSP),%ZMM0,%ZMM1 [9] |
0x52db61 VMULPS 0x4c0(%RSP),%ZMM0,%ZMM2 [9] |
0x52db69 VMULPS 0x780(%RSP),%ZMM0,%ZMM0 [9] |
0x52db71 VMULPS %ZMM3,%ZMM30,%ZMM4 |
0x52db77 VMULPS 0x380(%RSP),%ZMM3,%ZMM5 [9] |
0x52db7f VMULPS 0x7c0(%RSP),%ZMM3,%ZMM3 [9] |
0x52db87 VMOVAPS 0x280(%RSP),%ZMM14 [9] |
0x52db8f VADDPS %ZMM1,%ZMM14,%ZMM14 |
0x52db95 VMOVAPS 0x200(%RSP),%ZMM27 [9] |
0x52db9d VADDPS %ZMM4,%ZMM27,%ZMM27 |
0x52dba3 VADDPS %ZMM1,%ZMM4,%ZMM1 |
0x52dba9 VMOVAPS 0xc0(%RSP),%ZMM10 [9] |
0x52dbb1 VADDPS %ZMM2,%ZMM10,%ZMM10 |
0x52dbb7 VMOVAPS 0x2c0(%RSP),%ZMM12 [9] |
0x52dbbf VADDPS %ZMM5,%ZMM12,%ZMM12 |
0x52dbc5 VMOVAPS 0x300(%RSP),%ZMM17 [9] |
0x52dbcd VADDPS %ZMM0,%ZMM17,%ZMM17 |
0x52dbd3 VMOVAPS 0x240(%RSP),%ZMM24 [9] |
0x52dbdb VADDPS %ZMM3,%ZMM24,%ZMM24 |
0x52dbe1 VADDPS %ZMM2,%ZMM5,%ZMM2 |
0x52dbe7 VADDPS %ZMM0,%ZMM3,%ZMM0 |
0x52dbed VEXTRACTF64X4 $0x1,%ZMM1,%YMM3 |
0x52dbf4 VADDPS %YMM3,%YMM1,%YMM1 |
0x52dbf8 VMOVAPS (%R10,%R9,4),%YMM3 [5] |
0x52dbfe VSUBPS %YMM1,%YMM3,%YMM1 |
0x52dc02 VMOVAPS 0x20(%R10,%R9,4),%YMM3 [5] |
0x52dc09 VMOVAPS 0x40(%R10,%R9,4),%YMM4 [5] |
0x52dc10 VMOVAPS %YMM1,(%R10,%R9,4) [5] |
0x52dc16 VEXTRACTF64X4 $0x1,%ZMM2,%YMM1 |
0x52dc1d VADDPS %YMM1,%YMM2,%YMM1 |
0x52dc21 VSUBPS %YMM1,%YMM3,%YMM1 |
0x52dc25 VMOVAPS %YMM1,0x20(%R10,%R9,4) [5] |
0x52dc2c VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 |
0x52dc33 VADDPS %YMM1,%YMM0,%YMM0 |
0x52dc37 VSUBPS %YMM0,%YMM4,%YMM0 |
0x52dc3b VMOVAPS %YMM0,0x40(%R10,%R9,4) [5] |
0x52dc42 INC %RCX |
0x52dc45 MOV 0x140(%RSP),%R12 [9] |
0x52dc4d CMP %RCX,%R12 |
0x52dc50 VMOVAPS 0x500(%RSP),%ZMM19 [9] |
0x52dc58 VMOVAPS 0x700(%RSP),%ZMM23 [9] |
0x52dc60 VMOVAPS 0x6c0(%RSP),%ZMM25 [9] |
0x52dc68 VMOVAPS 0x680(%RSP),%ZMM26 [9] |
0x52dc70 VMOVAPS 0x640(%RSP),%ZMM28 [9] |
0x52dc78 VMOVAPS 0x600(%RSP),%ZMM31 [9] |
0x52dc80 VMOVAPS 0x480(%RSP),%ZMM11 [9] |
0x52dc88 JE 52fb60 |
0x52dc8e MOV 0x80(%RDI),%RAX [3] |
0x52dc95 CMPL $-0x1,0x4(%RAX,%RCX,8) [2] |
0x52dc9a JE 52ea4f |
0x52dca0 MOVSXD (%R15,%RCX,8),%R12 [4] |
0x52dca4 LEA (,%R12,8),%R13 |
0x52dcac VPBROADCASTD 0x4(%R15,%RCX,8),%ZMM0 [4] |
0x52dcb4 LEA (,%R13,2),%R9 |
0x52dcbc ADD %R13,%R9 |
0x52dcbf VPANDD 0x840(%RSP),%ZMM0,%ZMM5 [9] |
0x52dcc7 VPANDD 0x800(%RSP),%ZMM0,%ZMM6 [9] |
0x52dccf VBROADCASTF64X4 (%RSI,%R9,4),%ZMM0 [6] |
0x52dcd6 MOVSXD %R9D,%RAX |
0x52dcd9 VBROADCASTF64X4 0x20(%RSI,%RAX,4),%ZMM2 [6] |
0x52dce1 VBROADCASTF64X4 0x40(%RSI,%RAX,4),%ZMM7 [6] |
0x52dce9 VSUBPS %ZMM0,%ZMM23,%ZMM1 |
0x52dcef VSUBPS %ZMM0,%ZMM25,%ZMM30 |
0x52dcf5 VSUBPS %ZMM2,%ZMM26,%ZMM3 |
0x52dcfb VSUBPS %ZMM2,%ZMM28,%ZMM0 |
0x52dd01 VSUBPS %ZMM7,%ZMM31,%ZMM4 |
0x52dd07 VSUBPS %ZMM7,%ZMM11,%ZMM2 |
0x52dd0d VMULPS %ZMM1,%ZMM1,%ZMM7 |
0x52dd13 VMULPS %ZMM3,%ZMM3,%ZMM8 |
0x52dd19 VADDPS %ZMM8,%ZMM7,%ZMM7 |
0x52dd1f VMULPS %ZMM4,%ZMM4,%ZMM8 |
0x52dd25 VADDPS %ZMM8,%ZMM7,%ZMM7 |
0x52dd2b VMULPS %ZMM30,%ZMM30,%ZMM8 |
0x52dd31 VMULPS %ZMM0,%ZMM0,%ZMM9 |
0x52dd37 VADDPS %ZMM9,%ZMM8,%ZMM8 |
0x52dd3d VMULPS %ZMM2,%ZMM2,%ZMM9 |
0x52dd43 VADDPS %ZMM9,%ZMM8,%ZMM8 |
0x52dd49 VCMPPS $0x1,%ZMM19,%ZMM7,%K4 |
0x52dd50 VCMPPS $0x1,%ZMM19,%ZMM8,%K5 |
0x52dd57 LEA (%R12,%R12,1),%EAX |
0x52dd5b CMP %R11D,%EAX |
0x52dd5e VMOVAPS %ZMM21,0x100(%RSP) [9] |
0x52dd66 VMOVAPS %ZMM17,0x300(%RSP) [9] |
0x52dd6e VMOVAPS %ZMM24,0x240(%RSP) [9] |
0x52dd76 VMOVAPS %ZMM10,0xc0(%RSP) [9] |
0x52dd7e VMOVAPS %ZMM12,0x2c0(%RSP) [9] |
0x52dd86 VMOVAPS %ZMM27,0x200(%RSP) [9] |
0x52dd8e VMOVAPS %ZMM14,0x280(%RSP) [9] |
0x52dd96 VMOVAPS %ZMM1,0x3c0(%RSP) [9] |
0x52dd9e VMOVAPS %ZMM0,0x380(%RSP) [9] |
0x52dda6 VMOVAPS %ZMM3,0x4c0(%RSP) [9] |
0x52ddae VMOVAPS %ZMM2,0x7c0(%RSP) [9] |
0x52ddb6 VMOVAPS %ZMM4,0x780(%RSP) [9] |
0x52ddbe JE 52d6d0 |
0x52ddc4 OR $0x1,%EAX |
0x52ddc7 CMP %R11D,%EAX |
0x52ddca JNE 52d6da |
0x52ddd0 KANDD %K2,%K4,%K4 |
0x52ddd5 KANDD %K3,%K5,%K5 |
0x52ddda JMP 52d6da |
/home/eoseret/gromacs-2024.2/src/gromacs/nbnxm/simd_load_store_functions.h: 109 - 109 |
-------------------------------------------------------------------------------- |
109: return loadDuplicateHsimd(ptr + offset); |
/home/eoseret/gromacs-2024.2/src/gromacs/nbnxm/simd_kernel_inner.h: 63 - 275 |
-------------------------------------------------------------------------------- |
63: const int cj = l_cj[cjind].cj; |
64: |
65: /* Atom indices (of the first atom in the cluster) */ |
66: const int gmx_unused aj = cj * c_jClusterSize; |
67: |
68: const int ajx = |
69: (c_jClusterSize == c_stride ? aj * DIM : (cj >> 1) * DIM * c_stride + (cj & 1) * c_jClusterSize); |
[...] |
225: aj2 = aj * 2; |
[...] |
275: const SimdReal sqrtEpsilonJ = loadJAtomData<kernelLayout>(ljc, aj2 + c_stride); |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 1064 - 1064 |
-------------------------------------------------------------------------------- |
1064: return *(this->_M_impl._M_start + __n); |
/home/eoseret/gromacs-2024.2/src/gromacs/nbnxm/simd_diagonal_masker.h: 140 - 144 |
-------------------------------------------------------------------------------- |
140: if (jClusterIndex * 2 == iClusterIndex) |
141: { |
142: boolV = genBoolArr<nR>([&](int i) { return boolV[i] && diagonalMaskVV_[0][i]; }); |
143: } |
144: else if (jClusterIndex * 2 + 1 == iClusterIndex) |
/home/eoseret/gromacs-2024.2/src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_float.h: 95 - 370 |
-------------------------------------------------------------------------------- |
95: _mm512_shuffle_f32x4(a.simdInternal_, a.simdInternal_, 0xEE)); |
96: t = _mm256_load_ps(m); |
97: t = _mm256_sub_ps(t, _mm512_castps512_ps256(a.simdInternal_)); |
98: _mm256_store_ps(m, t); |
[...] |
370: return { _mm512_castpd_ps(_mm512_broadcast_f64x4(_mm256_load_pd(reinterpret_cast<const double*>(m)))) }; |
/home/eoseret/gromacs-2024.2/src/gromacs/simd/include/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_float.h: 70 - 461 |
-------------------------------------------------------------------------------- |
70: SimdFInt32(std::int32_t i) : simdInternal_(_mm512_set1_epi32(i)) {} |
[...] |
181: return { _mm512_add_ps(a.simdInternal_, b.simdInternal_) }; |
182: } |
183: |
184: static inline SimdFloat gmx_simdcall operator-(SimdFloat a, SimdFloat b) |
185: { |
186: return { _mm512_sub_ps(a.simdInternal_, b.simdInternal_) }; |
[...] |
197: return { _mm512_mul_ps(a.simdInternal_, b.simdInternal_) }; |
198: } |
199: |
200: static inline SimdFloat gmx_simdcall fma(SimdFloat a, SimdFloat b, SimdFloat c) |
201: { |
202: return { _mm512_fmadd_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_) }; |
203: } |
204: |
205: static inline SimdFloat gmx_simdcall fms(SimdFloat a, SimdFloat b, SimdFloat c) |
206: { |
207: return { _mm512_fmsub_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_) }; |
208: } |
209: |
210: static inline SimdFloat gmx_simdcall fnma(SimdFloat a, SimdFloat b, SimdFloat c) |
211: { |
212: return { _mm512_fnmadd_ps(a.simdInternal_, b.simdInternal_, c.simdInternal_) }; |
[...] |
224: return { _mm512_rsqrt14_ps(x.simdInternal_) }; |
225: } |
226: |
227: static inline SimdFloat gmx_simdcall rcp(SimdFloat x) |
228: { |
229: return { _mm512_rcp14_ps(x.simdInternal_) }; |
[...] |
269: return { _mm512_max_ps(a.simdInternal_, b.simdInternal_) }; |
[...] |
367: return { _mm512_cmp_ps_mask(a.simdInternal_, b.simdInternal_, _CMP_LT_OQ) }; |
[...] |
383: return { _mm512_kand(a.simdInternal_, b.simdInternal_) }; |
[...] |
398: return { _mm512_mask_mov_ps(_mm512_setzero_ps(), m.simdInternal_, a.simdInternal_) }; |
[...] |
461: return { _mm512_test_epi32_mask(a.simdInternal_, a.simdInternal_) }; |
/home/eoseret/gromacs-2024.2/src/gromacs/nbnxm/simd_kernel.h: 468 - 471 |
-------------------------------------------------------------------------------- |
468: while (cjind < cjind1 && nbl->cj.excl(cjind) != NBNXN_INTERACTION_MASK_ALL) |
469: { |
470: #include "simd_kernel_inner.h" |
471: cjind++; |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►98.62+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►1.38+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►97.69+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►2.31+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►73.75+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►10.62+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►8.69+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
►5.21+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►77.95+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►7.74+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►6.77+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►5.80+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►73.45+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►12.24+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
►6.97+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
►6.03+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►1.13+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►85.08+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►12.35+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►2.40+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►72.07+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►11.50+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►7.80+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
►3.49+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
►2.87+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►84.93+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1936 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 | |
►15.07+ | .omp_outlined.#0x5f8c30 | kerneldispatch.cpp:305 | libgromacs_mpi.so.9.0.0 |
○ | __kmp_invoke_microtask | libomp.so | |
○ | __kmp_fork_call | libomp.so | |
○ | __kmpc_fork_call | libomp.so | |
○ | nonbonded_verlet_t::dispatchNo[...] | kerneldispatch.cpp:272 | libgromacs_mpi.so.9.0.0 |
○ | do_nb_verlet(t_forcerec*, inte[...] | sim_util.cpp:455 | libgromacs_mpi.so.9.0.0 |
○ | do_force(_IO_FILE*, t_commrec [...] | sim_util.cpp:1974 | libgromacs_mpi.so.9.0.0 |
○ | gmx::LegacySimulator::do_md() | md.cpp:1248 | libgromacs_mpi.so.9.0.0 |
○ | gmx::Mdrunner::mdrunner() | runner.cpp:2311 | libgromacs_mpi.so.9.0.0 |
○ | gmx::gmx_mdrun(int, gmx_hw_inf[...] | mdrun.cpp:280 | gmx_mpi |
○ | gmx::gmx_mdrun(int, char**) | mdrun.cpp:82 | gmx_mpi |
○ | gmx::CommandLineModuleManager:[...] | cmdlinemodulemanager.cpp:569 | libgromacs_mpi.so.9.0.0 |
○ | main | gmx.cpp:58 | gmx_mpi |
○ | __libc_start_call_main | libc.so.6 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.06 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.06 |
Bottlenecks | |
Function | void gmx::nbnxmKernelSimd<(KernelLayout)1, (gmx::KernelCoulombType)1, (VdwCutoffCheck)0, (LJCombinationRule)1, (InteractionModifiers)1, (LJEwald)0, (EnergyOutput)1>(NbnxnPairlistCpu const*, nbnxn_atomdata_t const*, interaction_const_t const*, float const (*) [3], nbnxn_atomdata_output_t*) |
Source | simd_load_store_functions.h:109-109,simd_kernel_inner.h:63-69,simd_kernel_inner.h:225-225,simd_kernel_inner.h:275-275,stl_vector.h:1064-1064,simd_diagonal_masker.h:140-144,impl_x86_avx_512_util_float.h:95-98,impl_x86_avx_512_util_float.h:370-370,impl_x86_avx_512_simd_float.h:70-70,impl_x86_avx_512_simd_float.h:181-186,impl_x86_avx_512_simd_float.h:197-212,impl_x86_avx_512_simd_float.h:224-229,impl_x86_avx_512_simd_float.h:269-269,impl_x86_avx_512_simd_float.h:367-367,impl_x86_avx_512_simd_float.h:383-383,impl_x86_avx_512_simd_float.h:398-398,impl_x86_avx_512_simd_float.h:461-461,simd_kernel.h:468-471 |
Source loop unroll info | multi-versionned |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 117.83 |
CQA cycles if no scalar integer | 117.50 |
CQA cycles if FP arith vectorized | 117.83 |
CQA cycles if fully vectorized | 111.02 |
Front-end cycles | 46.89 |
P0 cycles | 4.08 |
P1 cycles | 4.08 |
P2 cycles | 3.92 |
P3 cycles | 3.92 |
P4 cycles | 2.00 |
P5 cycles | 40.00 |
P6 cycles | 40.00 |
P7 cycles | 40.00 |
P8 cycles | 117.83 |
P9 cycles | 111.50 |
P10 cycles | 39.00 |
P11 cycles | 61.00 |
P12 cycles | 14.50 |
P13 cycles | 14.50 |
DIV/SQRT cycles | 0.00 |
Inter-iter dependencies cycles | 0 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 269.67 |
Nb uops | 281.33 |
Nb loads | 72.00 |
Nb stores | 16.00 |
Nb stack references | 31.00 |
FLOP/cycle | 26.21 |
Nb FLOP add-sub | 512.00 |
Nb FLOP mul | 912.00 |
Nb FLOP fma | 784.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 64.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 32.00 |
Bytes/cycle | 27.80 |
Bytes prefetched | 0.00 |
Bytes loaded | 2348.00 |
Bytes stored | 928.00 |
Stride 0 | 3.00 |
Stride 1 | 1.00 |
Stride n | 0.00 |
Stride unknown | 7.00 |
Stride indirect | 0.00 |
Vectorization ratio all | 87.67 |
Vectorization ratio load | 59.42 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 51.84 |
Vector-efficiency ratio all | 83.96 |
Vector-efficiency ratio load | 55.43 |
Vector-efficiency ratio store | 90.63 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 91.43 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 46.80 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.01 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.07 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.06 |
Bottlenecks | P8, |
Function | void gmx::nbnxmKernelSimd<(KernelLayout)1, (gmx::KernelCoulombType)1, (VdwCutoffCheck)0, (LJCombinationRule)1, (InteractionModifiers)1, (LJEwald)0, (EnergyOutput)1>(NbnxnPairlistCpu const*, nbnxn_atomdata_t const*, interaction_const_t const*, float const (*) [3], nbnxn_atomdata_output_t*) |
Source | simd_load_store_functions.h:109-109,simd_kernel_inner.h:63-69,simd_kernel_inner.h:225-225,simd_kernel_inner.h:275-275,stl_vector.h:1064-1064,simd_diagonal_masker.h:140-144,impl_x86_avx_512_util_float.h:95-98,impl_x86_avx_512_util_float.h:370-370,impl_x86_avx_512_simd_float.h:70-70,impl_x86_avx_512_simd_float.h:181-186,impl_x86_avx_512_simd_float.h:197-212,impl_x86_avx_512_simd_float.h:224-229,impl_x86_avx_512_simd_float.h:269-269,impl_x86_avx_512_simd_float.h:367-367,impl_x86_avx_512_simd_float.h:383-383,impl_x86_avx_512_simd_float.h:398-398,impl_x86_avx_512_simd_float.h:461-461,simd_kernel.h:468-471 |
Source loop unroll info | multi-versionned |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 118.50 |
CQA cycles if no scalar integer | 117.50 |
CQA cycles if FP arith vectorized | 118.50 |
CQA cycles if fully vectorized | 111.06 |
Front-end cycles | 46.67 |
P0 cycles | 3.75 |
P1 cycles | 3.75 |
P2 cycles | 3.50 |
P3 cycles | 3.50 |
P4 cycles | 1.50 |
P5 cycles | 40.00 |
P6 cycles | 40.00 |
P7 cycles | 40.00 |
P8 cycles | 118.50 |
P9 cycles | 111.50 |
P10 cycles | 39.00 |
P11 cycles | 61.00 |
P12 cycles | 14.50 |
P13 cycles | 14.50 |
DIV/SQRT cycles | 0.00 |
Inter-iter dependencies cycles | 0 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 268.00 |
Nb uops | 280.00 |
Nb loads | 72.00 |
Nb stores | 16.00 |
Nb stack references | 31.00 |
FLOP/cycle | 26.06 |
Nb FLOP add-sub | 512.00 |
Nb FLOP mul | 912.00 |
Nb FLOP fma | 784.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 64.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 32.00 |
Bytes/cycle | 27.65 |
Bytes prefetched | 0.00 |
Bytes loaded | 2348.00 |
Bytes stored | 928.00 |
Stride 0 | 3.00 |
Stride 1 | 1.00 |
Stride n | 0.00 |
Stride unknown | 7.00 |
Stride indirect | 0.00 |
Vectorization ratio all | 87.90 |
Vectorization ratio load | 59.42 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 52.38 |
Vector-efficiency ratio all | 84.17 |
Vector-efficiency ratio load | 55.43 |
Vector-efficiency ratio store | 90.63 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 91.43 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 47.22 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.06 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.05 |
Bottlenecks | P8, |
Function | void gmx::nbnxmKernelSimd<(KernelLayout)1, (gmx::KernelCoulombType)1, (VdwCutoffCheck)0, (LJCombinationRule)1, (InteractionModifiers)1, (LJEwald)0, (EnergyOutput)1>(NbnxnPairlistCpu const*, nbnxn_atomdata_t const*, interaction_const_t const*, float const (*) [3], nbnxn_atomdata_output_t*) |
Source | simd_load_store_functions.h:109-109,simd_kernel_inner.h:63-69,simd_kernel_inner.h:225-225,simd_kernel_inner.h:275-275,stl_vector.h:1064-1064,simd_diagonal_masker.h:140-144,impl_x86_avx_512_util_float.h:95-98,impl_x86_avx_512_util_float.h:370-370,impl_x86_avx_512_simd_float.h:70-70,impl_x86_avx_512_simd_float.h:181-186,impl_x86_avx_512_simd_float.h:197-212,impl_x86_avx_512_simd_float.h:224-229,impl_x86_avx_512_simd_float.h:269-269,impl_x86_avx_512_simd_float.h:367-367,impl_x86_avx_512_simd_float.h:383-383,impl_x86_avx_512_simd_float.h:398-398,impl_x86_avx_512_simd_float.h:461-461,simd_kernel.h:468-471 |
Source loop unroll info | multi-versionned |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 117.50 |
CQA cycles if no scalar integer | 117.50 |
CQA cycles if FP arith vectorized | 117.50 |
CQA cycles if fully vectorized | 111.00 |
Front-end cycles | 47.33 |
P0 cycles | 4.50 |
P1 cycles | 4.50 |
P2 cycles | 4.25 |
P3 cycles | 4.25 |
P4 cycles | 2.50 |
P5 cycles | 40.00 |
P6 cycles | 40.00 |
P7 cycles | 40.00 |
P8 cycles | 117.50 |
P9 cycles | 111.50 |
P10 cycles | 39.50 |
P11 cycles | 61.50 |
P12 cycles | 14.50 |
P13 cycles | 14.50 |
DIV/SQRT cycles | 0.00 |
Inter-iter dependencies cycles | 0 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 272.00 |
Nb uops | 284.00 |
Nb loads | 72.00 |
Nb stores | 16.00 |
Nb stack references | 31.00 |
FLOP/cycle | 26.28 |
Nb FLOP add-sub | 512.00 |
Nb FLOP mul | 912.00 |
Nb FLOP fma | 784.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 64.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 32.00 |
Bytes/cycle | 27.88 |
Bytes prefetched | 0.00 |
Bytes loaded | 2348.00 |
Bytes stored | 928.00 |
Stride 0 | 3.00 |
Stride 1 | 1.00 |
Stride n | 0.00 |
Stride unknown | 7.00 |
Stride indirect | 0.00 |
Vectorization ratio all | 87.55 |
Vectorization ratio load | 59.42 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 51.56 |
Vector-efficiency ratio all | 83.86 |
Vector-efficiency ratio load | 55.43 |
Vector-efficiency ratio store | 90.63 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 91.43 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 46.58 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.06 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.05 |
Bottlenecks | P8, |
Function | void gmx::nbnxmKernelSimd<(KernelLayout)1, (gmx::KernelCoulombType)1, (VdwCutoffCheck)0, (LJCombinationRule)1, (InteractionModifiers)1, (LJEwald)0, (EnergyOutput)1>(NbnxnPairlistCpu const*, nbnxn_atomdata_t const*, interaction_const_t const*, float const (*) [3], nbnxn_atomdata_output_t*) |
Source | simd_load_store_functions.h:109-109,simd_kernel_inner.h:63-69,simd_kernel_inner.h:225-225,simd_kernel_inner.h:275-275,stl_vector.h:1064-1064,simd_diagonal_masker.h:140-144,impl_x86_avx_512_util_float.h:95-98,impl_x86_avx_512_util_float.h:370-370,impl_x86_avx_512_simd_float.h:70-70,impl_x86_avx_512_simd_float.h:181-186,impl_x86_avx_512_simd_float.h:197-212,impl_x86_avx_512_simd_float.h:224-229,impl_x86_avx_512_simd_float.h:269-269,impl_x86_avx_512_simd_float.h:367-367,impl_x86_avx_512_simd_float.h:383-383,impl_x86_avx_512_simd_float.h:398-398,impl_x86_avx_512_simd_float.h:461-461,simd_kernel.h:468-471 |
Source loop unroll info | multi-versionned |
Source loop unroll confidence level | high |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 117.50 |
CQA cycles if no scalar integer | 117.50 |
CQA cycles if FP arith vectorized | 117.50 |
CQA cycles if fully vectorized | 111.00 |
Front-end cycles | 46.67 |
P0 cycles | 4.00 |
P1 cycles | 4.00 |
P2 cycles | 4.00 |
P3 cycles | 4.00 |
P4 cycles | 2.00 |
P5 cycles | 40.00 |
P6 cycles | 40.00 |
P7 cycles | 40.00 |
P8 cycles | 117.50 |
P9 cycles | 111.50 |
P10 cycles | 38.50 |
P11 cycles | 60.50 |
P12 cycles | 14.50 |
P13 cycles | 14.50 |
DIV/SQRT cycles | 0.00 |
Inter-iter dependencies cycles | 0 |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 269.00 |
Nb uops | 280.00 |
Nb loads | 72.00 |
Nb stores | 16.00 |
Nb stack references | 31.00 |
FLOP/cycle | 26.28 |
Nb FLOP add-sub | 512.00 |
Nb FLOP mul | 912.00 |
Nb FLOP fma | 784.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 64.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 32.00 |
Bytes/cycle | 27.88 |
Bytes prefetched | 0.00 |
Bytes loaded | 2348.00 |
Bytes stored | 928.00 |
Stride 0 | 3.00 |
Stride 1 | 1.00 |
Stride n | 0.00 |
Stride unknown | 7.00 |
Stride indirect | 0.00 |
Vectorization ratio all | 87.55 |
Vectorization ratio load | 59.42 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | 100.00 |
Vectorization ratio other | 51.56 |
Vector-efficiency ratio all | 83.86 |
Vector-efficiency ratio load | 55.43 |
Vector-efficiency ratio store | 90.63 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 91.43 |
Vector-efficiency ratio fma | 100.00 |
Vector-efficiency ratio div_sqrt | 100.00 |
Vector-efficiency ratio other | 46.58 |
Path / |
Function | void gmx::nbnxmKernelSimd<(KernelLayout)1, (gmx::KernelCoulombType)1, (VdwCutoffCheck)0, (LJCombinationRule)1, (InteractionModifiers)1, (LJEwald)0, (EnergyOutput)1>(NbnxnPairlistCpu const*, nbnxn_atomdata_t const*, interaction_const_t const*, float const (*) [3], nbnxn_atomdata_output_t*) |
Source file and lines | impl_x86_avx_512_simd_float.h:70-461 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 269.67 |
nb uops | 281.33 |
loop length | 1786.33 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 5 |
used zmm registers | 32 |
nb stack references | 31 |
ADD-SUB / MUL ratio | 0.61 |
micro-operation queue | 46.89 cycles |
front end | 46.89 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.08 | 4.08 | 3.92 | 3.92 | 2.00 | 29.33 | 29.33 | 29.33 | 61.67 | 61.67 | 35.50 | 35.50 | 14.50 | 14.50 |
cycles | 4.08 | 4.08 | 3.92 | 3.92 | 2.00 | 40.00 | 40.00 | 40.00 | 117.83 | 111.50 | 39.00 | 61.00 | 14.50 | 14.50 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 0.00 |
Front-end | 46.89 |
Dispatch | 117.83 |
Data deps. | 0.00 |
Overall L1 | 117.83 |
all | 46% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 46% |
all | 89% |
load | 60% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 52% |
all | 87% |
load | 59% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 51% |
all | 51% |
load | 53% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 51% |
all | 85% |
load | 55% |
store | 90% |
mul | 100% |
add-sub | 91% |
fma | 100% |
div/sqrt | 100% |
other | 46% |
all | 83% |
load | 55% |
store | 90% |
mul | 100% |
add-sub | 91% |
fma | 100% |
div/sqrt | 100% |
other | 46% |
Function | void gmx::nbnxmKernelSimd<(KernelLayout)1, (gmx::KernelCoulombType)1, (VdwCutoffCheck)0, (LJCombinationRule)1, (InteractionModifiers)1, (LJEwald)0, (EnergyOutput)1>(NbnxnPairlistCpu const*, nbnxn_atomdata_t const*, interaction_const_t const*, float const (*) [3], nbnxn_atomdata_output_t*) |
Source file and lines | impl_x86_avx_512_simd_float.h:70-461 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 268 |
nb uops | 280 |
loop length | 1780 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 5 |
used zmm registers | 32 |
nb stack references | 31 |
ADD-SUB / MUL ratio | 0.61 |
micro-operation queue | 46.67 cycles |
front end | 46.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.75 | 3.75 | 3.50 | 3.50 | 1.50 | 29.33 | 29.33 | 29.33 | 62.00 | 62.00 | 35.50 | 35.50 | 14.50 | 14.50 |
cycles | 3.75 | 3.75 | 3.50 | 3.50 | 1.50 | 40.00 | 40.00 | 40.00 | 118.50 | 111.50 | 39.00 | 61.00 | 14.50 | 14.50 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 0.00 |
Front-end | 46.67 |
Dispatch | 118.50 |
Data deps. | 0.00 |
Overall L1 | 118.50 |
all | 50% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 50% |
all | 89% |
load | 60% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 52% |
all | 87% |
load | 59% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 52% |
all | 54% |
load | 53% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 54% |
all | 85% |
load | 55% |
store | 90% |
mul | 100% |
add-sub | 91% |
fma | 100% |
div/sqrt | 100% |
other | 46% |
all | 84% |
load | 55% |
store | 90% |
mul | 100% |
add-sub | 91% |
fma | 100% |
div/sqrt | 100% |
other | 47% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
KANDD %K0,%K4,%K4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
KANDD %K1,%K5,%K5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VMOVAPS 0x1c0(%RSP),%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VPTESTNMD %ZMM6,%ZMM6,%K7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d13ba(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VPTESTMD %ZMM5,%ZMM5,%K6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMAXPS %ZMM6,%ZMM7,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 1 | vect (100.0%) |
VMAXPS %ZMM6,%ZMM8,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 1 | vect (100.0%) |
VMOVAPS %ZMM22,%ZMM5{%K4}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPS 0x540(%RSP),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM5,%ZMM0,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM21,%ZMM5{%K5}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMULPS %ZMM5,%ZMM0,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM13,%ZMM13,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d0f48(%RIP),%ZMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d10b2(%RIP),%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM14,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM28,%ZMM24,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d131c(%RIP),%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d1456(%RIP),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM5,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM31,%ZMM24,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d15e8(%RIP),%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d14be(%RIP),%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d11e0(%RIP),%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM27,%ZMM24,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMOVAPS %ZMM20,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM3,%ZMM24,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d11b8(%RIP),%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d1046(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM7,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM16,%ZMM24,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1578(%RIP),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM0,%ZMM24,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d15bc(%RIP),%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM1,%ZMM24,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d12fc(%RIP),%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM2,%ZMM24,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM12,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM28,%ZMM9,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM31,%ZMM9,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM3,%ZMM9,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM16,%ZMM9,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM0,%ZMM9,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM1,%ZMM9,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM2,%ZMM9,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1404(%RIP),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d1092(%RIP),%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM1,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VBROADCASTSS -0x1d12fa(%RIP),%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM0,%ZMM24,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMOVAPS %ZMM3,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM27,%ZMM24,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1066(%RIP),%ZMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d1428(%RIP),%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM31,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM28,%ZMM24,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1692(%RIP),%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d133c(%RIP),%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM11,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM10,%ZMM24,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d14d2(%RIP),%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM17,%ZMM24,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1176(%RIP),%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM19,%ZMM24,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1082(%RIP),%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM25,%ZMM24,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM0,%ZMM9,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM27,%ZMM9,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM27,%ZMM9,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM28,%ZMM9,%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM10,%ZMM9,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM17,%ZMM9,%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRSQRT14PS %ZMM22,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VFMADD213PS %ZMM19,%ZMM9,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d147e(%RIP),%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM25,%ZMM9,%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d17b6(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMULPS %ZMM0,%ZMM22,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM9,%ZMM0,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRSQRT14PS %ZMM21,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMULPS %ZMM19,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM9,%ZMM19,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM10,%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM10,%ZMM19,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM17,%ZMM0,%ZMM0{%K4}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM21,%ZMM9,%ZMM9{%K5}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD231PS %ZMM6,%ZMM13,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD231PS %ZMM8,%ZMM13,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD231PS %ZMM5,%ZMM12,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRCP14PS %ZMM18,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VFMADD231PS %ZMM7,%ZMM12,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1428(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD231PS %ZMM2,%ZMM13,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRCP14PS %ZMM16,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VFNMADD213PS %ZMM6,%ZMM2,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM16,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD231PS %ZMM15,%ZMM13,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRCP14PS %ZMM14,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM4,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFNMADD213PS %ZMM6,%ZMM5,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD231PS %ZMM1,%ZMM12,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRCP14PS %ZMM3,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VFNMADD213PS %ZMM6,%ZMM7,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFNMADD213PS %ZMM6,%ZMM1,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM3,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD231PS %ZMM11,%ZMM12,%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMOVAPS 0x580(%RSP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM8,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM31,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM8,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM29,%ZMM3{%K6}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VADDPS %ZMM2,%ZMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VXORPS %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VBLENDMPS %ZMM6,%ZMM29,%ZMM3{%K7} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VADDPS %ZMM1,%ZMM3,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM18,%ZMM5,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM3,%ZMM23,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM0,%ZMM4{%K6}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMULPS %ZMM3,%ZMM8,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM4,%ZMM13,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VSUBPS %ZMM2,%ZMM4,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTF64X4 (%R8,%R13,4),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VMULPS %ZMM14,%ZMM7,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBLENDMPS %ZMM6,%ZMM9,%ZMM6{%K7} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VSUBPS %ZMM1,%ZMM6,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV %R12,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SAL $0x6,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
VMULPS %ZMM5,%ZMM20,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTF64X4 (%R14,%R12,1),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VADDPS 0x440(%RSP),%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM5,%ZMM8,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM6,%ZMM12,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM0,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM6,%ZMM6,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM6,%ZMM6,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM8,%ZMM6,%ZMM6{%K6}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM7,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x5c0(%RSP),%ZMM4,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM8,%ZMM2{%K4}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS 0x80(%RSP),%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x740(%RSP),%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM4,%ZMM22,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM4,%ZMM1{%K5}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM1,%ZMM2,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM1,0x80(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
SAL $0x4,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
OR $0x8,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CLTQ | scal (12.5%) | |||||||||||||||||
VMULPS %ZMM3,%ZMM8,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTF64X4 (%R14,%RAX,4),%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VMULPS 0x400(%RSP),%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM5,%ZMM4,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM6,%ZMM2,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM9,%ZMM9,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM3,%ZMM5,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x180(%RSP),%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM18,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM4,%ZMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM4,%ZMM6,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM4,%ZMM6,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM4,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM0,%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM7,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM6,%ZMM26,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS -0x1d134d(%RIP){1to16},%ZMM5,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
KANDW %K6,%K4,%K4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VFMADD132PS -0x1d1317(%RIP){1to16},%ZMM2,%ZMM1{%K4}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMOVAPS 0x100(%RSP),%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM1,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x3c0(%RSP),%ZMM0,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x4c0(%RSP),%ZMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x780(%RSP),%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM3,%ZMM30,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x380(%RSP),%ZMM3,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x7c0(%RSP),%ZMM3,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x280(%RSP),%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM1,%ZMM14,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x200(%RSP),%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM4,%ZMM27,%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM1,%ZMM4,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0xc0(%RSP),%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM2,%ZMM10,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x2c0(%RSP),%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM5,%ZMM12,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x300(%RSP),%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM0,%ZMM17,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x240(%RSP),%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM3,%ZMM24,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM2,%ZMM5,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM0,%ZMM3,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM1,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPS %YMM3,%YMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS (%R10,%R9,4),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VSUBPS %YMM1,%YMM3,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS 0x20(%R10,%R9,4),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS 0x40(%R10,%R9,4),%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS %YMM1,(%R10,%R9,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (50.0%) |
VEXTRACTF64X4 $0x1,%ZMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPS %YMM1,%YMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VSUBPS %YMM1,%YMM3,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS %YMM1,0x20(%R10,%R9,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (50.0%) |
VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPS %YMM1,%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VSUBPS %YMM0,%YMM4,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS %YMM0,0x40(%R10,%R9,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (50.0%) |
INC %RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x140(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %RCX,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
VMOVAPS 0x500(%RSP),%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x700(%RSP),%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x6c0(%RSP),%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x680(%RSP),%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x640(%RSP),%ZMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x600(%RSP),%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x480(%RSP),%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
JE 52fb60 <_ZN3gmx15nbnxmKernelSimdIL12KernelLayout1ELNS_17KernelCoulombTypeE1EL14VdwCutoffCheck0EL17LJCombinationRule1EL20InteractionModifiers1EL7LJEwald0EL12EnergyOutput1EEEvPK16NbnxnPairlistCpuPK16nbnxn_atomdata_tPK19interaction_const_tPA3_KfP23nbnxn_atomdata_output_t+0x29f0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x80(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMPL $-0x1,0x4(%RAX,%RCX,8) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (6.3%) |
JE 52ea4f <_ZN3gmx15nbnxmKernelSimdIL12KernelLayout1ELNS_17KernelCoulombTypeE1EL14VdwCutoffCheck0EL17LJCombinationRule1EL20InteractionModifiers1EL7LJEwald0EL12EnergyOutput1EEEvPK16NbnxnPairlistCpuPK16nbnxn_atomdata_tPK19interaction_const_tPA3_KfP23nbnxn_atomdata_output_t+0x18df> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOVSXD (%R15,%RCX,8),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
LEA (,%R12,8),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPBROADCASTD 0x4(%R15,%RCX,8),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 | scal (6.3%) |
LEA (,%R13,2),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %R13,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPANDD 0x840(%RSP),%ZMM0,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPANDD 0x800(%RSP),%ZMM0,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 | vect (100.0%) |
VBROADCASTF64X4 (%RSI,%R9,4),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
MOVSXD %R9D,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VBROADCASTF64X4 0x20(%RSI,%RAX,4),%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VBROADCASTF64X4 0x40(%RSI,%RAX,4),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VSUBPS %ZMM0,%ZMM23,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM0,%ZMM25,%ZMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM2,%ZMM26,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM2,%ZMM28,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM7,%ZMM31,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM7,%ZMM11,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM1,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM3,%ZMM3,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM4,%ZMM4,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM30,%ZMM30,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM0,%ZMM0,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM9,%ZMM8,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM2,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM9,%ZMM8,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VCMPPS $0x1,%ZMM19,%ZMM7,%K4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VCMPPS $0x1,%ZMM19,%ZMM8,%K5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
LEA (%R12,%R12,1),%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R11D,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVAPS %ZMM21,0x100(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM17,0x300(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM24,0x240(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM10,0xc0(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM12,0x2c0(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM27,0x200(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM14,0x280(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM1,0x3c0(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM0,0x380(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM3,0x4c0(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM2,0x7c0(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM4,0x780(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
JE 52d6d0 <_ZN3gmx15nbnxmKernelSimdIL12KernelLayout1ELNS_17KernelCoulombTypeE1EL14VdwCutoffCheck0EL17LJCombinationRule1EL20InteractionModifiers1EL7LJEwald0EL12EnergyOutput1EEEvPK16NbnxnPairlistCpuPK16nbnxn_atomdata_tPK19interaction_const_tPA3_KfP23nbnxn_atomdata_output_t+0x560> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
Function | void gmx::nbnxmKernelSimd<(KernelLayout)1, (gmx::KernelCoulombType)1, (VdwCutoffCheck)0, (LJCombinationRule)1, (InteractionModifiers)1, (LJEwald)0, (EnergyOutput)1>(NbnxnPairlistCpu const*, nbnxn_atomdata_t const*, interaction_const_t const*, float const (*) [3], nbnxn_atomdata_output_t*) |
Source file and lines | impl_x86_avx_512_simd_float.h:70-461 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 272 |
nb uops | 284 |
loop length | 1797 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 5 |
used zmm registers | 32 |
nb stack references | 31 |
ADD-SUB / MUL ratio | 0.61 |
micro-operation queue | 47.33 cycles |
front end | 47.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.50 | 4.50 | 4.25 | 4.25 | 2.50 | 29.33 | 29.33 | 29.33 | 61.50 | 61.50 | 36.00 | 36.00 | 14.50 | 14.50 |
cycles | 4.50 | 4.50 | 4.25 | 4.25 | 2.50 | 40.00 | 40.00 | 40.00 | 117.50 | 111.50 | 39.50 | 61.50 | 14.50 | 14.50 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 0.00 |
Front-end | 47.33 |
Dispatch | 117.50 |
Data deps. | 0.00 |
Overall L1 | 117.50 |
all | 44% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 44% |
all | 89% |
load | 60% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 52% |
all | 87% |
load | 59% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 51% |
all | 49% |
load | 53% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 49% |
all | 85% |
load | 55% |
store | 90% |
mul | 100% |
add-sub | 91% |
fma | 100% |
div/sqrt | 100% |
other | 46% |
all | 83% |
load | 55% |
store | 90% |
mul | 100% |
add-sub | 91% |
fma | 100% |
div/sqrt | 100% |
other | 46% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VMOVAPS 0x1c0(%RSP),%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VPTESTNMD %ZMM6,%ZMM6,%K7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d13ba(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VPTESTMD %ZMM5,%ZMM5,%K6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMAXPS %ZMM6,%ZMM7,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 1 | vect (100.0%) |
VMAXPS %ZMM6,%ZMM8,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 1 | vect (100.0%) |
VMOVAPS %ZMM22,%ZMM5{%K4}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPS 0x540(%RSP),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM5,%ZMM0,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM21,%ZMM5{%K5}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMULPS %ZMM5,%ZMM0,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM13,%ZMM13,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d0f48(%RIP),%ZMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d10b2(%RIP),%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM14,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM28,%ZMM24,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d131c(%RIP),%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d1456(%RIP),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM5,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM31,%ZMM24,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d15e8(%RIP),%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d14be(%RIP),%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d11e0(%RIP),%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM27,%ZMM24,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMOVAPS %ZMM20,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM3,%ZMM24,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d11b8(%RIP),%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d1046(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM7,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM16,%ZMM24,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1578(%RIP),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM0,%ZMM24,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d15bc(%RIP),%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM1,%ZMM24,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d12fc(%RIP),%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM2,%ZMM24,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM12,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM28,%ZMM9,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM31,%ZMM9,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM3,%ZMM9,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM16,%ZMM9,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM0,%ZMM9,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM1,%ZMM9,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM2,%ZMM9,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1404(%RIP),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d1092(%RIP),%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM1,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VBROADCASTSS -0x1d12fa(%RIP),%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM0,%ZMM24,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMOVAPS %ZMM3,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM27,%ZMM24,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1066(%RIP),%ZMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d1428(%RIP),%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM31,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM28,%ZMM24,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1692(%RIP),%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d133c(%RIP),%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM11,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM10,%ZMM24,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d14d2(%RIP),%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM17,%ZMM24,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1176(%RIP),%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM19,%ZMM24,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1082(%RIP),%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM25,%ZMM24,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM0,%ZMM9,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM27,%ZMM9,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM27,%ZMM9,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM28,%ZMM9,%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM10,%ZMM9,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM17,%ZMM9,%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRSQRT14PS %ZMM22,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VFMADD213PS %ZMM19,%ZMM9,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d147e(%RIP),%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM25,%ZMM9,%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d17b6(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMULPS %ZMM0,%ZMM22,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM9,%ZMM0,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRSQRT14PS %ZMM21,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMULPS %ZMM19,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM9,%ZMM19,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM10,%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM10,%ZMM19,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM17,%ZMM0,%ZMM0{%K4}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM21,%ZMM9,%ZMM9{%K5}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD231PS %ZMM6,%ZMM13,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD231PS %ZMM8,%ZMM13,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD231PS %ZMM5,%ZMM12,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRCP14PS %ZMM18,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VFMADD231PS %ZMM7,%ZMM12,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1428(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD231PS %ZMM2,%ZMM13,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRCP14PS %ZMM16,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VFNMADD213PS %ZMM6,%ZMM2,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM16,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD231PS %ZMM15,%ZMM13,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRCP14PS %ZMM14,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM4,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFNMADD213PS %ZMM6,%ZMM5,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD231PS %ZMM1,%ZMM12,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRCP14PS %ZMM3,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VFNMADD213PS %ZMM6,%ZMM7,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFNMADD213PS %ZMM6,%ZMM1,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM3,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD231PS %ZMM11,%ZMM12,%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMOVAPS 0x580(%RSP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM8,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM31,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM8,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM29,%ZMM3{%K6}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VADDPS %ZMM2,%ZMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VXORPS %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VBLENDMPS %ZMM6,%ZMM29,%ZMM3{%K7} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VADDPS %ZMM1,%ZMM3,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM18,%ZMM5,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM3,%ZMM23,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM0,%ZMM4{%K6}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMULPS %ZMM3,%ZMM8,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM4,%ZMM13,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VSUBPS %ZMM2,%ZMM4,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTF64X4 (%R8,%R13,4),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VMULPS %ZMM14,%ZMM7,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBLENDMPS %ZMM6,%ZMM9,%ZMM6{%K7} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VSUBPS %ZMM1,%ZMM6,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV %R12,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SAL $0x6,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
VMULPS %ZMM5,%ZMM20,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTF64X4 (%R14,%R12,1),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VADDPS 0x440(%RSP),%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM5,%ZMM8,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM6,%ZMM12,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM0,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM6,%ZMM6,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM6,%ZMM6,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM8,%ZMM6,%ZMM6{%K6}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM7,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x5c0(%RSP),%ZMM4,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM8,%ZMM2{%K4}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS 0x80(%RSP),%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x740(%RSP),%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM4,%ZMM22,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM4,%ZMM1{%K5}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM1,%ZMM2,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM1,0x80(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
SAL $0x4,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
OR $0x8,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CLTQ | scal (12.5%) | |||||||||||||||||
VMULPS %ZMM3,%ZMM8,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTF64X4 (%R14,%RAX,4),%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VMULPS 0x400(%RSP),%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM5,%ZMM4,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM6,%ZMM2,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM9,%ZMM9,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM3,%ZMM5,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x180(%RSP),%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM18,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM4,%ZMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM4,%ZMM6,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM4,%ZMM6,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM4,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM0,%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM7,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM6,%ZMM26,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS -0x1d134d(%RIP){1to16},%ZMM5,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
KANDW %K6,%K4,%K4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VFMADD132PS -0x1d1317(%RIP){1to16},%ZMM2,%ZMM1{%K4}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMOVAPS 0x100(%RSP),%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM1,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x3c0(%RSP),%ZMM0,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x4c0(%RSP),%ZMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x780(%RSP),%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM3,%ZMM30,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x380(%RSP),%ZMM3,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x7c0(%RSP),%ZMM3,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x280(%RSP),%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM1,%ZMM14,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x200(%RSP),%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM4,%ZMM27,%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM1,%ZMM4,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0xc0(%RSP),%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM2,%ZMM10,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x2c0(%RSP),%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM5,%ZMM12,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x300(%RSP),%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM0,%ZMM17,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x240(%RSP),%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM3,%ZMM24,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM2,%ZMM5,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM0,%ZMM3,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM1,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPS %YMM3,%YMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS (%R10,%R9,4),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VSUBPS %YMM1,%YMM3,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS 0x20(%R10,%R9,4),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS 0x40(%R10,%R9,4),%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS %YMM1,(%R10,%R9,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (50.0%) |
VEXTRACTF64X4 $0x1,%ZMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPS %YMM1,%YMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VSUBPS %YMM1,%YMM3,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS %YMM1,0x20(%R10,%R9,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (50.0%) |
VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPS %YMM1,%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VSUBPS %YMM0,%YMM4,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS %YMM0,0x40(%R10,%R9,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (50.0%) |
INC %RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x140(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %RCX,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
VMOVAPS 0x500(%RSP),%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x700(%RSP),%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x6c0(%RSP),%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x680(%RSP),%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x640(%RSP),%ZMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x600(%RSP),%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x480(%RSP),%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
JE 52fb60 <_ZN3gmx15nbnxmKernelSimdIL12KernelLayout1ELNS_17KernelCoulombTypeE1EL14VdwCutoffCheck0EL17LJCombinationRule1EL20InteractionModifiers1EL7LJEwald0EL12EnergyOutput1EEEvPK16NbnxnPairlistCpuPK16nbnxn_atomdata_tPK19interaction_const_tPA3_KfP23nbnxn_atomdata_output_t+0x29f0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x80(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMPL $-0x1,0x4(%RAX,%RCX,8) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (6.3%) |
JE 52ea4f <_ZN3gmx15nbnxmKernelSimdIL12KernelLayout1ELNS_17KernelCoulombTypeE1EL14VdwCutoffCheck0EL17LJCombinationRule1EL20InteractionModifiers1EL7LJEwald0EL12EnergyOutput1EEEvPK16NbnxnPairlistCpuPK16nbnxn_atomdata_tPK19interaction_const_tPA3_KfP23nbnxn_atomdata_output_t+0x18df> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOVSXD (%R15,%RCX,8),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
LEA (,%R12,8),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPBROADCASTD 0x4(%R15,%RCX,8),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 | scal (6.3%) |
LEA (,%R13,2),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %R13,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPANDD 0x840(%RSP),%ZMM0,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPANDD 0x800(%RSP),%ZMM0,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 | vect (100.0%) |
VBROADCASTF64X4 (%RSI,%R9,4),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
MOVSXD %R9D,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VBROADCASTF64X4 0x20(%RSI,%RAX,4),%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VBROADCASTF64X4 0x40(%RSI,%RAX,4),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VSUBPS %ZMM0,%ZMM23,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM0,%ZMM25,%ZMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM2,%ZMM26,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM2,%ZMM28,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM7,%ZMM31,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM7,%ZMM11,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM1,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM3,%ZMM3,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM4,%ZMM4,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM30,%ZMM30,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM0,%ZMM0,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM9,%ZMM8,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM2,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM9,%ZMM8,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VCMPPS $0x1,%ZMM19,%ZMM7,%K4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VCMPPS $0x1,%ZMM19,%ZMM8,%K5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
LEA (%R12,%R12,1),%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R11D,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
VMOVAPS %ZMM21,0x100(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM17,0x300(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM24,0x240(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM10,0xc0(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM12,0x2c0(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM27,0x200(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM14,0x280(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM1,0x3c0(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM0,0x380(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM3,0x4c0(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM2,0x7c0(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM4,0x780(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
JE 52d6d0 <_ZN3gmx15nbnxmKernelSimdIL12KernelLayout1ELNS_17KernelCoulombTypeE1EL14VdwCutoffCheck0EL17LJCombinationRule1EL20InteractionModifiers1EL7LJEwald0EL12EnergyOutput1EEEvPK16NbnxnPairlistCpuPK16nbnxn_atomdata_tPK19interaction_const_tPA3_KfP23nbnxn_atomdata_output_t+0x560> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
OR $0x1,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R11D,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JNE 52d6da <_ZN3gmx15nbnxmKernelSimdIL12KernelLayout1ELNS_17KernelCoulombTypeE1EL14VdwCutoffCheck0EL17LJCombinationRule1EL20InteractionModifiers1EL7LJEwald0EL12EnergyOutput1EEEvPK16NbnxnPairlistCpuPK16nbnxn_atomdata_tPK19interaction_const_tPA3_KfP23nbnxn_atomdata_output_t+0x56a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
KANDD %K2,%K4,%K4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
KANDD %K3,%K5,%K5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
JMP 52d6da <_ZN3gmx15nbnxmKernelSimdIL12KernelLayout1ELNS_17KernelCoulombTypeE1EL14VdwCutoffCheck0EL17LJCombinationRule1EL20InteractionModifiers1EL7LJEwald0EL12EnergyOutput1EEEvPK16NbnxnPairlistCpuPK16nbnxn_atomdata_tPK19interaction_const_tPA3_KfP23nbnxn_atomdata_output_t+0x56a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
Function | void gmx::nbnxmKernelSimd<(KernelLayout)1, (gmx::KernelCoulombType)1, (VdwCutoffCheck)0, (LJCombinationRule)1, (InteractionModifiers)1, (LJEwald)0, (EnergyOutput)1>(NbnxnPairlistCpu const*, nbnxn_atomdata_t const*, interaction_const_t const*, float const (*) [3], nbnxn_atomdata_output_t*) |
Source file and lines | impl_x86_avx_512_simd_float.h:70-461 |
Module | libgromacs_mpi.so.9.0.0 |
nb instructions | 269 |
nb uops | 280 |
loop length | 1782 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 5 |
used zmm registers | 32 |
nb stack references | 31 |
ADD-SUB / MUL ratio | 0.61 |
micro-operation queue | 46.67 cycles |
front end | 46.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.00 | 4.00 | 4.00 | 4.00 | 2.00 | 29.33 | 29.33 | 29.33 | 61.50 | 61.50 | 35.00 | 35.00 | 14.50 | 14.50 |
cycles | 4.00 | 4.00 | 4.00 | 4.00 | 2.00 | 40.00 | 40.00 | 40.00 | 117.50 | 111.50 | 38.50 | 60.50 | 14.50 | 14.50 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 0.00 |
Front-end | 46.67 |
Dispatch | 117.50 |
Data deps. | 0.00 |
Overall L1 | 117.50 |
all | 44% |
load | 50% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 44% |
all | 89% |
load | 60% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 52% |
all | 87% |
load | 59% |
store | 100% |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 51% |
all | 49% |
load | 53% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 49% |
all | 85% |
load | 55% |
store | 90% |
mul | 100% |
add-sub | 91% |
fma | 100% |
div/sqrt | 100% |
other | 46% |
all | 83% |
load | 55% |
store | 90% |
mul | 100% |
add-sub | 91% |
fma | 100% |
div/sqrt | 100% |
other | 46% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VMOVAPS 0x1c0(%RSP),%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VPTESTNMD %ZMM6,%ZMM6,%K7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d13ba(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VPTESTMD %ZMM5,%ZMM5,%K6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMAXPS %ZMM6,%ZMM7,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 1 | vect (100.0%) |
VMAXPS %ZMM6,%ZMM8,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 2 | 1 | vect (100.0%) |
VMOVAPS %ZMM22,%ZMM5{%K4}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPS 0x540(%RSP),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM5,%ZMM0,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM21,%ZMM5{%K5}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMULPS %ZMM5,%ZMM0,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM13,%ZMM13,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d0f48(%RIP),%ZMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d10b2(%RIP),%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM14,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM28,%ZMM24,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d131c(%RIP),%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d1456(%RIP),%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM5,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM31,%ZMM24,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d15e8(%RIP),%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d14be(%RIP),%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d11e0(%RIP),%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM27,%ZMM24,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMOVAPS %ZMM20,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM3,%ZMM24,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d11b8(%RIP),%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d1046(%RIP),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM7,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM16,%ZMM24,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1578(%RIP),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM0,%ZMM24,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d15bc(%RIP),%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM1,%ZMM24,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d12fc(%RIP),%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM2,%ZMM24,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM12,%ZMM12,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM28,%ZMM9,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM31,%ZMM9,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM3,%ZMM9,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM16,%ZMM9,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM0,%ZMM9,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM1,%ZMM9,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM2,%ZMM9,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1404(%RIP),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d1092(%RIP),%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM1,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VBROADCASTSS -0x1d12fa(%RIP),%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM0,%ZMM24,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMOVAPS %ZMM3,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM27,%ZMM24,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1066(%RIP),%ZMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d1428(%RIP),%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM31,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM28,%ZMM24,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1692(%RIP),%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VBROADCASTSS -0x1d133c(%RIP),%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMOVAPS %ZMM11,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM10,%ZMM24,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d14d2(%RIP),%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM17,%ZMM24,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1176(%RIP),%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM19,%ZMM24,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1082(%RIP),%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM25,%ZMM24,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM0,%ZMM9,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM27,%ZMM9,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM27,%ZMM9,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM28,%ZMM9,%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM10,%ZMM9,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD213PS %ZMM17,%ZMM9,%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRSQRT14PS %ZMM22,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VFMADD213PS %ZMM19,%ZMM9,%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d147e(%RIP),%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD213PS %ZMM25,%ZMM9,%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d17b6(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VMULPS %ZMM0,%ZMM22,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM9,%ZMM0,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRSQRT14PS %ZMM21,%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMULPS %ZMM19,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM9,%ZMM19,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM10,%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM10,%ZMM19,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM17,%ZMM0,%ZMM0{%K4}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM21,%ZMM9,%ZMM9{%K5}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD231PS %ZMM6,%ZMM13,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD231PS %ZMM8,%ZMM13,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD231PS %ZMM5,%ZMM12,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRCP14PS %ZMM18,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VFMADD231PS %ZMM7,%ZMM12,%ZMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VBROADCASTSS -0x1d1428(%RIP),%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (6.3%) |
VFMADD231PS %ZMM2,%ZMM13,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRCP14PS %ZMM16,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VFNMADD213PS %ZMM6,%ZMM2,%ZMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM16,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD231PS %ZMM15,%ZMM13,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRCP14PS %ZMM14,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM4,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFNMADD213PS %ZMM6,%ZMM5,%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFMADD231PS %ZMM1,%ZMM12,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VRCP14PS %ZMM3,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VFNMADD213PS %ZMM6,%ZMM7,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VFNMADD213PS %ZMM6,%ZMM1,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM3,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD231PS %ZMM11,%ZMM12,%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMOVAPS 0x580(%RSP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM8,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM31,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM8,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM29,%ZMM3{%K6}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VADDPS %ZMM2,%ZMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VXORPS %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VBLENDMPS %ZMM6,%ZMM29,%ZMM3{%K7} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VADDPS %ZMM1,%ZMM3,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM18,%ZMM5,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM3,%ZMM23,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM0,%ZMM4{%K6}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMULPS %ZMM3,%ZMM8,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM4,%ZMM13,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VSUBPS %ZMM2,%ZMM4,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTF64X4 (%R8,%R13,4),%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VMULPS %ZMM14,%ZMM7,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBLENDMPS %ZMM6,%ZMM9,%ZMM6{%K7} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VSUBPS %ZMM1,%ZMM6,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV %R12,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SAL $0x6,%R12 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
VMULPS %ZMM5,%ZMM20,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTF64X4 (%R14,%R12,1),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VADDPS 0x440(%RSP),%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM5,%ZMM8,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM6,%ZMM12,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM0,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM6,%ZMM6,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM6,%ZMM6,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM8,%ZMM6,%ZMM6{%K6}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM7,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x5c0(%RSP),%ZMM4,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM8,%ZMM2{%K4}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS 0x80(%RSP),%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x740(%RSP),%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM4,%ZMM22,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM4,%ZMM1{%K5}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM1,%ZMM2,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM1,0x80(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
SAL $0x4,%RAX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
OR $0x8,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CLTQ | scal (12.5%) | |||||||||||||||||
VMULPS %ZMM3,%ZMM8,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VBROADCASTF64X4 (%R14,%RAX,4),%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VMULPS 0x400(%RSP),%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM5,%ZMM4,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM6,%ZMM2,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM7,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM9,%ZMM9,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM3,%ZMM5,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x180(%RSP),%ZMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS %ZMM18,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VFMADD213PS %ZMM4,%ZMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS %ZMM4,%ZMM6,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM4,%ZMM6,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM4,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM0,%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM7,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VFMADD213PS %ZMM6,%ZMM26,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMULPS -0x1d134d(%RIP){1to16},%ZMM5,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
KANDW %K6,%K4,%K4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VFMADD132PS -0x1d1317(%RIP){1to16},%ZMM2,%ZMM1{%K4}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 1 | vect (100.0%) |
VMOVAPS 0x100(%RSP),%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM1,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x3c0(%RSP),%ZMM0,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x4c0(%RSP),%ZMM0,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x780(%RSP),%ZMM0,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM3,%ZMM30,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x380(%RSP),%ZMM3,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS 0x7c0(%RSP),%ZMM3,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x280(%RSP),%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM1,%ZMM14,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x200(%RSP),%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM4,%ZMM27,%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM1,%ZMM4,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0xc0(%RSP),%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM2,%ZMM10,%ZMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x2c0(%RSP),%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM5,%ZMM12,%ZMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x300(%RSP),%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM0,%ZMM17,%ZMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x240(%RSP),%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM3,%ZMM24,%ZMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM2,%ZMM5,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM0,%ZMM3,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM1,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPS %YMM3,%YMM1,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS (%R10,%R9,4),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VSUBPS %YMM1,%YMM3,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS 0x20(%R10,%R9,4),%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS 0x40(%R10,%R9,4),%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS %YMM1,(%R10,%R9,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (50.0%) |
VEXTRACTF64X4 $0x1,%ZMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPS %YMM1,%YMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VSUBPS %YMM1,%YMM3,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS %YMM1,0x20(%R10,%R9,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (50.0%) |
VEXTRACTF64X4 $0x1,%ZMM0,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPS %YMM1,%YMM0,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VSUBPS %YMM0,%YMM4,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPS %YMM0,0x40(%R10,%R9,4) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 4 | 1 | vect (50.0%) |
INC %RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x140(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %RCX,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
VMOVAPS 0x500(%RSP),%ZMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x700(%RSP),%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x6c0(%RSP),%ZMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x680(%RSP),%ZMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x640(%RSP),%ZMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x600(%RSP),%ZMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPS 0x480(%RSP),%ZMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
JE 52fb60 <_ZN3gmx15nbnxmKernelSimdIL12KernelLayout1ELNS_17KernelCoulombTypeE1EL14VdwCutoffCheck0EL17LJCombinationRule1EL20InteractionModifiers1EL7LJEwald0EL12EnergyOutput1EEEvPK16NbnxnPairlistCpuPK16nbnxn_atomdata_tPK19interaction_const_tPA3_KfP23nbnxn_atomdata_output_t+0x29f0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x80(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMPL $-0x1,0x4(%RAX,%RCX,8) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (6.3%) |
JE 52ea4f <_ZN3gmx15nbnxmKernelSimdIL12KernelLayout1ELNS_17KernelCoulombTypeE1EL14VdwCutoffCheck0EL17LJCombinationRule1EL20InteractionModifiers1EL7LJEwald0EL12EnergyOutput1EEEvPK16NbnxnPairlistCpuPK16nbnxn_atomdata_tPK19interaction_const_tPA3_KfP23nbnxn_atomdata_output_t+0x18df> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOVSXD (%R15,%RCX,8),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
LEA (,%R12,8),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPBROADCASTD 0x4(%R15,%RCX,8),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0.50 | scal (6.3%) |
LEA (,%R13,2),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %R13,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPANDD 0x840(%RSP),%ZMM0,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPANDD 0x800(%RSP),%ZMM0,%ZMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 | vect (100.0%) |
VBROADCASTF64X4 (%RSI,%R9,4),%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
MOVSXD %R9D,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VBROADCASTF64X4 0x20(%RSI,%RAX,4),%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VBROADCASTF64X4 0x40(%RSI,%RAX,4),%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 4 | 1 | vect (50.0%) |
VSUBPS %ZMM0,%ZMM23,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM0,%ZMM25,%ZMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM2,%ZMM26,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM2,%ZMM28,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM7,%ZMM31,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VSUBPS %ZMM7,%ZMM11,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM1,%ZMM1,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM3,%ZMM3,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM4,%ZMM4,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM30,%ZMM30,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM0,%ZMM0,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM9,%ZMM8,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMULPS %ZMM2,%ZMM2,%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VADDPS %ZMM9,%ZMM8,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VCMPPS $0x1,%ZMM19,%ZMM7,%K4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VCMPPS $0x1,%ZMM19,%ZMM8,%K5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
LEA (%R12,%R12,1),%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R11D,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
VMOVAPS %ZMM21,0x100(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM17,0x300(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM24,0x240(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM10,0xc0(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM12,0x2c0(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM27,0x200(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM14,0x280(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM1,0x3c0(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM0,0x380(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM3,0x4c0(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM2,0x7c0(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
VMOVAPS %ZMM4,0x780(%RSP) | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 2 | vect (100.0%) |
JE 52d6d0 <_ZN3gmx15nbnxmKernelSimdIL12KernelLayout1ELNS_17KernelCoulombTypeE1EL14VdwCutoffCheck0EL17LJCombinationRule1EL20InteractionModifiers1EL7LJEwald0EL12EnergyOutput1EEEvPK16NbnxnPairlistCpuPK16nbnxn_atomdata_tPK19interaction_const_tPA3_KfP23nbnxn_atomdata_output_t+0x560> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
OR $0x1,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R11D,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JNE 52d6da <_ZN3gmx15nbnxmKernelSimdIL12KernelLayout1ELNS_17KernelCoulombTypeE1EL14VdwCutoffCheck0EL17LJCombinationRule1EL20InteractionModifiers1EL7LJEwald0EL12EnergyOutput1EEEvPK16NbnxnPairlistCpuPK16nbnxn_atomdata_tPK19interaction_const_tPA3_KfP23nbnxn_atomdata_output_t+0x56a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
Run 1x1 | Number processes: 1Number processes per node: 1OMP_NUM_THREADS: 1 |
---|---|
Run 2x1 | Number processes: 2Number processes per node: 2OMP_NUM_THREADS: 1 |
Run 4x1 | Number processes: 4Number processes per node: 4OMP_NUM_THREADS: 1 |
Run 8x1 | Number processes: 8Number processes per node: 8OMP_NUM_THREADS: 1 |
Run 16x1 | Number processes: 16Number processes per node: 16OMP_NUM_THREADS: 1 |
Run 32x1 | Number processes: 32Number processes per node: 32OMP_NUM_THREADS: 1 |
Run 64x1 | Number processes: 64Number processes per node: 64OMP_NUM_THREADS: 1 |
Run 128x1 | Number processes: 128Number processes per node: 128OMP_NUM_THREADS: 1 |
Run 192x1 | Number processes: 192Number nodes: 1Number processes per node: 192Run Command: <executable> mdrun -s ion_channel.tpr -nsteps 10000 -pin on -deffnm aoccMPI Command: mpirun -genv I_MPI_FABRICS=shm -n <number_processes>Dataset: Run Directory: .OMP_NUM_THREADS: 1 |
(1x1) Efficiency | (1x1) Potential Speed-Up (%) | (2x1) Efficiency | (2x1) Potential Speed-Up (%) | (4x1) Efficiency | (4x1) Potential Speed-Up (%) | (8x1) Efficiency | (8x1) Potential Speed-Up (%) | (16x1) Efficiency | (16x1) Potential Speed-Up (%) | (32x1) Efficiency | (32x1) Potential Speed-Up (%) | (64x1) Efficiency | (64x1) Potential Speed-Up (%) | (128x1) Efficiency | (128x1) Potential Speed-Up (%) | (192x1) Efficiency | (192x1) Potential Speed-Up (%) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 1 | 0 | 0.91 | 0.02 | 0.84 | 0.03 | 0.84 | 0.03 | 0.82 | 0.03 | 0.74 | 0.03 | 0.88 | 0.01 | 0.88 | 0.01 |
Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
---|---|---|---|---|---|---|
1x1 | 1 | 1 | 1 | 1 | 2.1699981689453 | 0.26031503081322 |
2x1 | 2 | 1 | 2 | 2 | 1.1499997377396 | 0.23992493748665 |
4x1 | 4 | 0.91 | 3.65 | 4 | 0.76500034332275 | 0.23898197710514 |
8x1 | 8 | 0.84 | 6.7 | 8 | 0.45499977469444 | 0.20932200551033 |
16x1 | 16 | 0.84 | 13.37 | 16 | 0.21499991416931 | 0.19622223079205 |
32x1 | 20 | 0.82 | 26.13 | 32 | 0.21000002324581 | 0.16162538528442 |
64x1 | 40 | 0.74 | 47.48 | 64 | 0.14000000059605 | 0.11499262601137 |
128x1 | 82 | 0.88 | 112.45 | 128 | 0.065000005066395 | 0.11354496330023 |
192x1 | 118 | 0.88 | 168 | 192 | 0.059999998658895 | 0.095891937613487 |