Loop Id: 879 | Module: exec | Source: MultiBsplineRef.hpp:227-262 [...] | Coverage: 0.01% |
---|
Loop Id: 879 | Module: exec | Source: MultiBsplineRef.hpp:227-262 [...] | Coverage: 0.01% |
---|
0x43c720 MOV 0x40(%RSP),%R9 |
0x43c725 IMUL %R12,%R9 |
0x43c729 ADD 0x160(%RSP),%R9 |
0x43c731 VBROADCASTSD %XMM17,%YMM20 |
0x43c737 VBROADCASTSD %XMM19,%YMM19 |
0x43c73d VBROADCASTSD %XMM16,%YMM17 |
0x43c743 VBROADCASTSD %XMM18,%YMM18 |
0x43c749 VBROADCASTSD %XMM21,%YMM16 |
0x43c74f VBROADCASTSD %XMM15,%YMM15 |
0x43c754 XOR %R12D,%R12D |
0x43c757 MOV 0x30(%RSP),%RDI |
0x43c75c SUB %R12,%RDI |
0x43c75f VPBROADCASTQ %RDI,%YMM21 |
0x43c765 VPCMPNLEUQ 0xbfd70(%RIP),%YMM21,%K1 |
0x43c770 ADD %R12,%R9 |
0x43c773 MOV 0x60(%RSP),%RCX |
0x43c778 LEA (%RCX,%R9,1),%RDI |
0x43c77c MOV 0xc0(%RSP),%RBX |
0x43c784 VMOVUPD (%RBX,%RDI,8),%YMM21{%K1}{z} |
0x43c78b MOV 0xe0(%RSP),%RCX |
0x43c793 LEA (%RCX,%R9,1),%RDI |
0x43c797 VMOVUPD (%RBX,%RDI,8),%YMM22{%K1}{z} |
0x43c79e VMOVAPD %YMM24,%YMM29 |
0x43c7a4 VMOVUPD 0x4e0(%RSP),%YMM24 |
0x43c7ac VMOVAPD %YMM21,%YMM24{%K1} |
0x43c7b2 VMOVAPD %YMM23,%YMM26 |
0x43c7b8 VMOVUPD 0x500(%RSP),%YMM23 |
0x43c7c0 VMOVAPD %YMM22,%YMM23{%K1} |
0x43c7c6 MOV 0xd0(%RSP),%RCX |
0x43c7ce LEA (%RCX,%R9,1),%RDI |
0x43c7d2 VMOVUPD (%RBX,%RDI,8),%YMM21{%K1}{z} |
0x43c7d9 VMOVAPD %YMM21,%YMM14{%K1} |
0x43c7df ADD 0x140(%RSP),%R9 |
0x43c7e7 VMOVUPD (%RBX,%R9,8),%YMM21{%K1}{z} |
0x43c7ee MOV 0x138(%RSP),%RBX |
0x43c7f6 VMOVAPD %YMM21,%YMM13{%K1} |
0x43c7fc VMULPD %YMM0,%YMM24,%YMM21 |
0x43c802 VFMADD231PD %YMM1,%YMM23,%YMM21 |
0x43c808 VFMADD231PD %YMM2,%YMM14,%YMM21 |
0x43c80e VFMADD231PD %YMM13,%YMM3,%YMM21 |
0x43c814 VMOVUPD (%R13,%R12,8),%YMM22{%K1}{z} |
0x43c81c VMOVAPD %YMM22,%YMM12{%K1} |
0x43c822 VMULPD %YMM4,%YMM24,%YMM22 |
0x43c828 VFMADD213PD %YMM12,%YMM21,%YMM20 |
0x43c82e VMOVUPD %YMM20,(%R13,%R12,8){%K1} |
0x43c836 VMULPD %YMM5,%YMM23,%YMM20 |
0x43c83c VFMADD231PD %YMM7,%YMM13,%YMM20 |
0x43c842 VFMADD231PD %YMM6,%YMM14,%YMM22 |
0x43c848 VFMADD231PD %YMM20,%YMM8,%YMM22 |
0x43c84e MOV 0x10(%RSP),%RCX |
0x43c853 LEA (%RCX,%R12,1),%R9 |
0x43c857 VMOVUPD (%R13,%R9,8),%YMM20{%K1}{z} |
0x43c85f VMOVAPD %YMM20,%YMM31{%K1} |
0x43c865 VFMADD213PD %YMM31,%YMM21,%YMM19 |
0x43c86b VMOVUPD %YMM19,(%R13,%R9,8){%K1} |
0x43c873 MOV 0x80(%RSP),%RCX |
0x43c87b LEA (%RCX,%R12,1),%RDI |
0x43c87f VMOVUPD (%R13,%RDI,8),%YMM19{%K1}{z} |
0x43c887 VMOVAPD %YMM19,%YMM30{%K1} |
0x43c88d VMOVAPD %YMM17,%YMM19 |
0x43c893 VFMADD213PD %YMM30,%YMM22,%YMM19 |
0x43c899 VMOVUPD %YMM19,(%R13,%RDI,8){%K1} |
0x43c8a1 MOV 0x70(%RSP),%RCX |
0x43c8a6 LEA (%RCX,%R12,1),%RCX |
0x43c8aa VMOVUPD (%R13,%RCX,8),%YMM19{%K1}{z} |
0x43c8b2 VMOVUPD 0x520(%RSP),%YMM20 |
0x43c8ba VMOVAPD %YMM19,%YMM20{%K1} |
0x43c8c0 VMOVUPD %YMM20,0x520(%RSP) |
0x43c8c8 VFMADD213PD %YMM20,%YMM21,%YMM18 |
0x43c8ce VMOVUPD %YMM18,(%R13,%RCX,8){%K1} |
0x43c8d6 MOV 0x78(%RSP),%RCX |
0x43c8db LEA (%RCX,%R12,1),%RCX |
0x43c8df VMOVUPD (%R13,%RCX,8),%YMM18{%K1}{z} |
0x43c8e7 VMOVUPD 0x540(%RSP),%YMM19 |
0x43c8ef VMOVAPD %YMM18,%YMM19{%K1} |
0x43c8f5 VMOVAPD %YMM16,%YMM18 |
0x43c8fb VMOVUPD %YMM19,0x540(%RSP) |
0x43c903 VFMADD213PD %YMM19,%YMM22,%YMM18 |
0x43c909 VMOVUPD %YMM18,(%R13,%RCX,8){%K1} |
0x43c911 MOV 0x68(%RSP),%RCX |
0x43c916 LEA (%RCX,%R12,1),%RCX |
0x43c91a VMOVUPD (%R13,%RCX,8),%YMM18{%K1}{z} |
0x43c922 VMOVAPD %YMM18,%YMM27{%K1} |
0x43c928 VMOVUPD %YMM24,0x4e0(%RSP) |
0x43c930 VMULPD %YMM9,%YMM24,%YMM18 |
0x43c936 VMOVAPD %YMM29,%YMM24 |
0x43c93c VMOVUPD %YMM23,0x500(%RSP) |
0x43c944 VFMADD231PD %YMM10,%YMM23,%YMM18 |
0x43c94a VMOVAPD %YMM26,%YMM23 |
0x43c950 VFMADD231PD %YMM11,%YMM14,%YMM18 |
0x43c956 VFMADD231PD %YMM8,%YMM13,%YMM18 |
0x43c95c VFMADD213PD %YMM27,%YMM15,%YMM18 |
0x43c962 VMOVUPD %YMM18,(%R13,%RCX,8){%K1} |
0x43c96a VMOVUPD (%RBX,%R12,8),%YMM18{%K1}{z} |
0x43c971 VMOVAPD %YMM18,%YMM28{%K1} |
0x43c977 VFMADD213PD %YMM28,%YMM21,%YMM17 |
0x43c97d VMOVUPD %YMM17,(%RBX,%R12,8){%K1} |
0x43c984 VMOVUPD (%RBX,%R9,8),%YMM17{%K1}{z} |
0x43c98b VMOVAPD %YMM17,%YMM24{%K1} |
0x43c991 VFMADD213PD %YMM24,%YMM21,%YMM16 |
0x43c997 VMOVUPD %YMM16,(%RBX,%R9,8){%K1} |
0x43c99e VMOVUPD (%RBX,%RDI,8),%YMM16{%K1}{z} |
0x43c9a5 VMOVAPD %YMM16,%YMM23{%K1} |
0x43c9ab VFMADD213PD %YMM23,%YMM15,%YMM22 |
0x43c9b1 VMOVUPD %YMM22,(%RBX,%RDI,8){%K1} |
0x43c9b8 MOV 0x30(%RSP),%RDI |
0x43c9bd VMOVUPD (%R14,%R12,8),%YMM16{%K1}{z} |
0x43c9c4 VMOVAPD %YMM16,%YMM25{%K1} |
0x43c9ca VFMADD213PD %YMM25,%YMM15,%YMM21 |
0x43c9d0 VMOVUPD %YMM21,(%R14,%R12,8){%K1} |
0x43c9d7 MOV 0x18(%RSP),%R12 |
0x43c9dc LEA 0x1(%R12),%RCX |
0x43c9e1 MOV 0x90(%RSP),%R9 |
0x43c9e9 ADD %R9,%R15 |
0x43c9ec ADD %R9,%RSI |
0x43c9ef ADD %R9,%RDX |
0x43c9f2 ADD %R9,%RAX |
0x43c9f5 CMP $0x3,%R12 |
0x43c9f9 MOV %RCX,0x18(%RSP) |
0x43c9fe MOV 0x98(%RSP),%RCX |
0x43ca06 MOV 0x88(%RSP),%R12 |
0x43ca0e JE 43c650 |
0x43ca14 TEST %EDI,%EDI |
0x43ca16 JLE 43c9d7 |
0x43ca18 MOV 0x18(%RSP),%R12 |
0x43ca1d VMOVSD 0x380(%RSP,%R12,8),%XMM15 |
0x43ca27 VMULSD 0x2a0(%RSP),%XMM15,%XMM17 |
0x43ca2f VMOVSD 0x280(%RSP),%XMM19 |
0x43ca37 VMULSD %XMM19,%XMM15,%XMM16 |
0x43ca3d VMOVSD 0x260(%RSP),%XMM20 |
0x43ca45 VMULSD %XMM20,%XMM15,%XMM15 |
0x43ca4b VMOVSD 0x220(%RSP,%R12,8),%XMM18 |
0x43ca53 VMULSD %XMM19,%XMM18,%XMM19 |
0x43ca59 VMULSD %XMM20,%XMM18,%XMM21 |
0x43ca5f VMULSD 0x1e0(%RSP,%R12,8),%XMM20,%XMM18 |
0x43ca67 TEST %RCX,%RCX |
0x43ca6a JE 43c720 |
0x43ca70 VMOVUPD %YMM14,0x180(%RSP) |
0x43ca79 VMOVUPD %YMM13,0x1a0(%RSP) |
0x43ca82 VMOVUPD %YMM12,0x1c0(%RSP) |
0x43ca8b VMOVUPD %YMM31,0x2c0(%RSP) |
0x43ca93 VMOVUPD %YMM30,0x2e0(%RSP) |
0x43ca9b VMOVAPD %YMM27,%YMM12 |
0x43caa1 VMOVAPD %YMM28,%YMM29 |
0x43caa7 VMOVAPD %YMM24,%YMM31 |
0x43caad VMOVAPD %YMM23,%YMM30 |
0x43cab3 VMOVAPD %YMM25,%YMM28 |
0x43cab9 VBROADCASTSD %XMM17,%YMM20 |
0x43cabf VBROADCASTSD %XMM19,%YMM19 |
0x43cac5 VBROADCASTSD %XMM16,%YMM17 |
0x43cacb VBROADCASTSD %XMM18,%YMM18 |
0x43cad1 VBROADCASTSD %XMM21,%YMM16 |
0x43cad7 VBROADCASTSD %XMM15,%YMM15 |
0x43cadc MOV 0x40(%RSP),%R9 |
0x43cae1 IMUL %R12,%R9 |
0x43cae5 ADD 0x160(%RSP),%R9 |
0x43caed XOR %R12D,%R12D |
(880) 0x43caf0 VMOVUPD (%RAX,%R12,8),%YMM22 |
(880) 0x43caf7 VMOVUPD (%RDX,%R12,8),%YMM23 |
(880) 0x43cafe VMOVUPD (%RSI,%R12,8),%YMM24 |
(880) 0x43cb05 VMOVUPD (%R15,%R12,8),%YMM25 |
(880) 0x43cb0c VMULPD %YMM0,%YMM22,%YMM21 |
(880) 0x43cb12 VFMADD231PD %YMM1,%YMM23,%YMM21 |
(880) 0x43cb18 VFMADD231PD %YMM2,%YMM24,%YMM21 |
(880) 0x43cb1e VFMADD231PD %YMM25,%YMM3,%YMM21 |
(880) 0x43cb24 VMULPD %YMM4,%YMM22,%YMM26 |
(880) 0x43cb2a VMULPD %YMM5,%YMM23,%YMM27 |
(880) 0x43cb30 VFMADD231PD %YMM7,%YMM25,%YMM27 |
(880) 0x43cb36 VFMADD231PD %YMM6,%YMM24,%YMM26 |
(880) 0x43cb3c VFMADD231PD %YMM27,%YMM8,%YMM26 |
(880) 0x43cb42 LEA (%R13,%R12,8),%RCX |
(880) 0x43cb47 VMOVUPD (%R13,%R12,8),%YMM27 |
(880) 0x43cb4f VFMADD231PD %YMM21,%YMM20,%YMM27 |
(880) 0x43cb55 VMOVUPD %YMM27,(%R13,%R12,8) |
(880) 0x43cb5d VMOVUPD (%R11,%RCX,1),%YMM27 |
(880) 0x43cb64 VFMADD231PD %YMM21,%YMM19,%YMM27 |
(880) 0x43cb6a VMOVUPD %YMM27,(%R11,%RCX,1) |
(880) 0x43cb71 VMULPD %YMM9,%YMM22,%YMM22 |
(880) 0x43cb77 LEA (%RCX,%R11,1),%RCX |
(880) 0x43cb7b VMOVUPD (%R11,%RCX,1),%YMM27 |
(880) 0x43cb82 VFMADD231PD %YMM17,%YMM26,%YMM27 |
(880) 0x43cb88 VMOVUPD %YMM27,(%R11,%RCX,1) |
(880) 0x43cb8f VFMADD231PD %YMM23,%YMM10,%YMM22 |
(880) 0x43cb95 LEA (%RCX,%R11,1),%RCX |
(880) 0x43cb99 VMOVUPD (%R11,%RCX,1),%YMM23 |
(880) 0x43cba0 VFMADD231PD %YMM21,%YMM18,%YMM23 |
(880) 0x43cba6 VMOVUPD %YMM23,(%R11,%RCX,1) |
(880) 0x43cbad VFMADD231PD %YMM24,%YMM11,%YMM22 |
(880) 0x43cbb3 LEA (%RCX,%R11,1),%RCX |
(880) 0x43cbb7 VMOVUPD (%R11,%RCX,1),%YMM23 |
(880) 0x43cbbe VFMADD231PD %YMM16,%YMM26,%YMM23 |
(880) 0x43cbc4 VMOVUPD %YMM23,(%R11,%RCX,1) |
(880) 0x43cbcb VFMADD231PD %YMM25,%YMM8,%YMM22 |
(880) 0x43cbd1 LEA (%RCX,%R11,1),%RCX |
(880) 0x43cbd5 VFMADD213PD (%R11,%RCX,1),%YMM15,%YMM22 |
(880) 0x43cbdc VMOVUPD %YMM22,(%R11,%RCX,1) |
(880) 0x43cbe3 MOV 0x98(%RSP),%RCX |
(880) 0x43cbeb VMOVUPD (%RBX,%R12,8),%YMM22 |
(880) 0x43cbf2 VFMADD231PD %YMM17,%YMM21,%YMM22 |
(880) 0x43cbf8 VMOVUPD %YMM22,(%RBX,%R12,8) |
(880) 0x43cbff VMOVUPD (%R8,%R12,8),%YMM22 |
(880) 0x43cc06 VFMADD231PD %YMM16,%YMM21,%YMM22 |
(880) 0x43cc0c VMOVUPD %YMM22,(%R8,%R12,8) |
(880) 0x43cc13 VFMADD213PD (%R10,%R12,8),%YMM15,%YMM26 |
(880) 0x43cc1a VMOVUPD %YMM26,(%R10,%R12,8) |
(880) 0x43cc21 VFMADD213PD (%R14,%R12,8),%YMM15,%YMM21 |
(880) 0x43cc28 VMOVUPD %YMM21,(%R14,%R12,8) |
(880) 0x43cc2f ADD $0x4,%R12 |
(880) 0x43cc33 CMP %RCX,%R12 |
(880) 0x43cc36 JL 43caf0 |
0x43cc3c MOV %RCX,%R12 |
0x43cc3f CMP %RDI,%RCX |
0x43cc42 VMOVAPD %YMM28,%YMM25 |
0x43cc48 VMOVAPD %YMM30,%YMM23 |
0x43cc4e VMOVAPD %YMM31,%YMM24 |
0x43cc54 VMOVAPD %YMM29,%YMM28 |
0x43cc5a VMOVAPD %YMM12,%YMM27 |
0x43cc60 VMOVUPD 0x2e0(%RSP),%YMM30 |
0x43cc68 VMOVUPD 0x2c0(%RSP),%YMM31 |
0x43cc70 VMOVUPD 0x1c0(%RSP),%YMM12 |
0x43cc79 VMOVUPD 0x1a0(%RSP),%YMM13 |
0x43cc82 VMOVUPD 0x180(%RSP),%YMM14 |
0x43cc8b JNE 43c757 |
0x43cc91 JMP 43c9d7 |
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 61 - 61 |
-------------------------------------------------------------------------------- |
61: for (size_t d = 0; d < D; ++d) |
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineRef.hpp: 227 - 262 |
-------------------------------------------------------------------------------- |
227: for (int j = 0; j < 4; j++) |
[...] |
234: const T pre20 = d2a[i] * b[j]; |
235: const T pre10 = da[i] * b[j]; |
236: const T pre00 = a[i] * b[j]; |
237: const T pre11 = da[i] * db[j]; |
238: const T pre01 = a[i] * db[j]; |
239: const T pre02 = a[i] * d2b[j]; |
240: |
241: const int iSplitPoint = num_splines; |
242: for (int n = 0; n < iSplitPoint; n++) |
243: { |
244: T coefsv = coefs[n]; |
245: T coefsvzs = coefszs[n]; |
246: T coefsv2zs = coefs2zs[n]; |
247: T coefsv3zs = coefs3zs[n]; |
248: |
249: T sum0 = c[0] * coefsv + c[1] * coefsvzs + c[2] * coefsv2zs + c[3] * coefsv3zs; |
250: T sum1 = dc[0] * coefsv + dc[1] * coefsvzs + dc[2] * coefsv2zs + dc[3] * coefsv3zs; |
251: T sum2 = d2c[0] * coefsv + d2c[1] * coefsvzs + d2c[2] * coefsv2zs + d2c[3] * coefsv3zs; |
252: |
253: hxx[n] += pre20 * sum0; |
254: hxy[n] += pre11 * sum0; |
255: hxz[n] += pre10 * sum1; |
256: hyy[n] += pre02 * sum0; |
257: hyz[n] += pre01 * sum1; |
258: hzz[n] += pre00 * sum2; |
259: gx[n] += pre10 * sum0; |
260: gy[n] += pre01 * sum0; |
261: gz[n] += pre00 * sum1; |
262: vals[n] += pre00 * sum0; |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►88.56+ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:100 | exec |
○ | qmcplusplus::WaveFunction::rat[...] | WaveFunction.cpp:196 | exec |
○ | main.extracted.110 | refwrap.h:313 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so | |
►6.78+ | qmcplusplus::SPOSet::evaluate_[...] | OhmmsVector.h:144 | exec |
○ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:263 | exec |
○ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:238 | exec |
○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:171 | exec |
○ | main.extracted.113 | miniqmc.cpp:397 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so | |
►4.66+ | qmcplusplus::SPOSet::evaluate_[...] | OhmmsVector.h:144 | exec |
○ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:263 | exec |
○ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:238 | exec |
○ | qmcplusplus::WaveFunction::eva[...] | WaveFunction.cpp:170 | exec |
○ | main.extracted.113 | miniqmc.cpp:397 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.22 |
CQA speedup if FP arith vectorized | 1.81 |
CQA speedup if fully vectorized | 4.16 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.60 |
Bottlenecks | |
Function | miniqmcreference::einspline_spo_ref |
Source | TinyVector.h:61-61,MultiBsplineRef.hpp:227-227,MultiBsplineRef.hpp:234-239,MultiBsplineRef.hpp:242-262 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 15.46 |
CQA cycles if no scalar integer | 12.67 |
CQA cycles if FP arith vectorized | 8.52 |
CQA cycles if fully vectorized | 3.72 |
Front-end cycles | 15.46 |
DIV/SQRT cycles | 9.10 |
P0 cycles | 9.10 |
P1 cycles | 9.58 |
P2 cycles | 9.58 |
P3 cycles | 5.25 |
P4 cycles | 5.55 |
P5 cycles | 5.15 |
P6 cycles | 5.25 |
P7 cycles | 5.25 |
P8 cycles | 5.25 |
P9 cycles | 5.10 |
P10 cycles | 9.58 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | 0 - 2 |
FE+BE cycles (UFS) | 15.68 |
Stall cycles (UFS) | 0.00 |
Nb insns | 93.00 |
Nb uops | 92.00 |
Nb loads | 29.25 |
Nb stores | 10.50 |
Nb stack references | 18.25 |
FLOP/cycle | 5.73 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 12.50 |
Nb FLOP fma | 38.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 44.02 |
Bytes prefetched | 0.00 |
Bytes loaded | 522.00 |
Bytes stored | 312.00 |
Stride 0 | 1.50 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 9.00 |
Stride indirect | 0.00 |
Vectorization ratio all | 49.25 |
Vectorization ratio load | 47.93 |
Vectorization ratio store | 67.92 |
Vectorization ratio mul | 26.67 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 49.56 |
Vector-efficiency ratio all | 30.91 |
Vector-efficiency ratio load | 30.47 |
Vector-efficiency ratio store | 37.97 |
Vector-efficiency ratio mul | 22.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 30.91 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 8.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.55 |
Bottlenecks | micro-operation queue, |
Function | miniqmcreference::einspline_spo_ref |
Source | TinyVector.h:61-61,MultiBsplineRef.hpp:227-227,MultiBsplineRef.hpp:234-239,MultiBsplineRef.hpp:242-262 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 2.17 |
CQA cycles if no scalar integer | 2.17 |
CQA cycles if FP arith vectorized | 2.17 |
CQA cycles if fully vectorized | 0.27 |
Front-end cycles | 2.17 |
DIV/SQRT cycles | 1.40 |
P0 cycles | 1.40 |
P1 cycles | 1.33 |
P2 cycles | 1.33 |
P3 cycles | 0.50 |
P4 cycles | 1.40 |
P5 cycles | 1.40 |
P6 cycles | 0.50 |
P7 cycles | 0.50 |
P8 cycles | 0.50 |
P9 cycles | 1.40 |
P10 cycles | 1.33 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | 0 - 2 |
FE+BE cycles (UFS) | 2.26 |
Stall cycles (UFS) | 0.00 |
Nb insns | 14.00 |
Nb uops | 13.00 |
Nb loads | 4.00 |
Nb stores | 1.00 |
Nb stack references | 4.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 18.46 |
Bytes prefetched | 0.00 |
Bytes loaded | 32.00 |
Bytes stored | 8.00 |
Stride 0 | 1.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 0.00 |
Stride indirect | 0.00 |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.50 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.25 |
CQA speedup if FP arith vectorized | 1.92 |
CQA speedup if fully vectorized | 3.66 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.51 |
Bottlenecks | micro-operation queue, |
Function | miniqmcreference::einspline_spo_ref |
Source | TinyVector.h:61-61,MultiBsplineRef.hpp:227-227,MultiBsplineRef.hpp:234-239,MultiBsplineRef.hpp:242-262 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 22.67 |
CQA cycles if no scalar integer | 18.17 |
CQA cycles if FP arith vectorized | 11.79 |
CQA cycles if fully vectorized | 6.19 |
Front-end cycles | 22.67 |
DIV/SQRT cycles | 15.00 |
P0 cycles | 15.00 |
P1 cycles | 14.67 |
P2 cycles | 14.67 |
P3 cycles | 7.50 |
P4 cycles | 7.00 |
P5 cycles | 6.60 |
P6 cycles | 7.50 |
P7 cycles | 7.50 |
P8 cycles | 7.50 |
P9 cycles | 6.40 |
P10 cycles | 14.67 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | 0 - 2 |
FE+BE cycles (UFS) | 22.97 |
Stall cycles (UFS) | 0.00 |
Nb insns | 137.00 |
Nb uops | 135.00 |
Nb loads | 45.00 |
Nb stores | 15.00 |
Nb stack references | 25.00 |
FLOP/cycle | 7.68 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 22.00 |
Nb FLOP fma | 76.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 56.12 |
Bytes prefetched | 0.00 |
Bytes loaded | 816.00 |
Bytes stored | 456.00 |
Stride 0 | 2.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 9.00 |
Stride indirect | 0.00 |
Vectorization ratio all | 76.00 |
Vectorization ratio load | 76.00 |
Vectorization ratio store | 93.33 |
Vectorization ratio mul | 40.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 70.00 |
Vector-efficiency ratio all | 40.94 |
Vector-efficiency ratio load | 41.00 |
Vector-efficiency ratio store | 47.50 |
Vector-efficiency ratio mul | 27.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 38.54 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.41 |
CQA speedup if FP arith vectorized | 1.67 |
CQA speedup if fully vectorized | 6.18 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.72 |
Bottlenecks | micro-operation queue, |
Function | miniqmcreference::einspline_spo_ref |
Source | TinyVector.h:61-61,MultiBsplineRef.hpp:227-227,MultiBsplineRef.hpp:234-239,MultiBsplineRef.hpp:242-262 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 10.33 |
CQA cycles if no scalar integer | 7.33 |
CQA cycles if FP arith vectorized | 6.18 |
CQA cycles if fully vectorized | 1.67 |
Front-end cycles | 10.33 |
DIV/SQRT cycles | 5.00 |
P0 cycles | 5.00 |
P1 cycles | 6.00 |
P2 cycles | 6.00 |
P3 cycles | 3.00 |
P4 cycles | 6.00 |
P5 cycles | 5.00 |
P6 cycles | 3.00 |
P7 cycles | 3.00 |
P8 cycles | 3.00 |
P9 cycles | 5.00 |
P10 cycles | 6.00 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | 0 - 2 |
FE+BE cycles (UFS) | 10.54 |
Stall cycles (UFS) | 0.00 |
Nb insns | 61.00 |
Nb uops | 61.00 |
Nb loads | 18.00 |
Nb stores | 6.00 |
Nb stack references | 14.00 |
FLOP/cycle | 0.58 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 6.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 41.81 |
Bytes prefetched | 0.00 |
Bytes loaded | 264.00 |
Bytes stored | 168.00 |
Stride 0 | 1.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 11.00 |
Stride indirect | 0.00 |
Vectorization ratio all | 41.67 |
Vectorization ratio load | 35.71 |
Vectorization ratio store | 83.33 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 52.63 |
Vector-efficiency ratio all | 27.99 |
Vector-efficiency ratio load | 25.89 |
Vector-efficiency ratio store | 43.75 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 31.91 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.16 |
CQA speedup if FP arith vectorized | 1.91 |
CQA speedup if fully vectorized | 3.96 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.63 |
Bottlenecks | micro-operation queue, |
Function | miniqmcreference::einspline_spo_ref |
Source | TinyVector.h:61-61,MultiBsplineRef.hpp:227-227,MultiBsplineRef.hpp:234-239,MultiBsplineRef.hpp:242-262 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 26.67 |
CQA cycles if no scalar integer | 23.00 |
CQA cycles if FP arith vectorized | 13.96 |
CQA cycles if fully vectorized | 6.73 |
Front-end cycles | 26.67 |
DIV/SQRT cycles | 15.00 |
P0 cycles | 15.00 |
P1 cycles | 16.33 |
P2 cycles | 16.33 |
P3 cycles | 10.00 |
P4 cycles | 7.80 |
P5 cycles | 7.60 |
P6 cycles | 10.00 |
P7 cycles | 10.00 |
P8 cycles | 10.00 |
P9 cycles | 7.60 |
P10 cycles | 16.33 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | 0 - 2 |
FE+BE cycles (UFS) | 26.95 |
Stall cycles (UFS) | 0.00 |
Nb insns | 160.00 |
Nb uops | 159.00 |
Nb loads | 50.00 |
Nb stores | 20.00 |
Nb stack references | 30.00 |
FLOP/cycle | 6.52 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 22.00 |
Nb FLOP fma | 76.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 59.70 |
Bytes prefetched | 0.00 |
Bytes loaded | 976.00 |
Bytes stored | 616.00 |
Stride 0 | 2.00 |
Stride 1 | 0.00 |
Stride n | 0.00 |
Stride unknown | 16.00 |
Stride indirect | 0.00 |
Vectorization ratio all | 79.34 |
Vectorization ratio load | 80.00 |
Vectorization ratio store | 95.00 |
Vectorization ratio mul | 40.00 |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 75.61 |
Vector-efficiency ratio all | 42.20 |
Vector-efficiency ratio load | 42.50 |
Vector-efficiency ratio store | 48.13 |
Vector-efficiency ratio mul | 27.50 |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 40.70 |
Path / |
Function | miniqmcreference::einspline_spo_ref |
Source file and lines | MultiBsplineRef.hpp:227-262 |
Module | exec |
nb instructions | 93 |
nb uops | 92 |
loop length | 572.50 |
used x86 registers | 10.50 |
used mmx registers | 0 |
used xmm registers | 5.25 |
used ymm registers | 20.25 |
used zmm registers | 0 |
nb stack references | 18.25 |
micro-operation queue | 15.46 cycles |
front end | 15.46 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 9.10 | 9.10 | 9.58 | 9.58 | 5.25 | 5.55 | 5.15 | 5.25 | 5.25 | 5.25 | 5.10 | 9.58 |
cycles | 9.10 | 9.10 | 9.58 | 9.58 | 5.25 | 5.55 | 5.15 | 5.25 | 5.25 | 5.25 | 5.10 | 9.58 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 0.00-2.00 |
FE+BE cycles | 15.68 |
Stall cycles | 0.00 |
Front-end | 15.46 |
Dispatch | 9.68 |
Data deps. | 0.00-2.00 |
Overall L1 | 15.46 |
all | 5% |
load | 50% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 74% |
load | 66% |
store | 100% |
mul | 26% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 74% |
all | 49% |
load | 47% |
store | 67% |
mul | 26% |
add-sub | 0% |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 49% |
all | 14% |
load | 31% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 15% |
all | 40% |
load | 37% |
store | 50% |
mul | 22% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 40% |
all | 30% |
load | 30% |
store | 37% |
mul | 22% |
add-sub | 12% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 30% |
Function | miniqmcreference::einspline_spo_ref |
Source file and lines | MultiBsplineRef.hpp:227-262 |
Module | exec |
nb instructions | 14 |
nb uops | 13 |
loop length | 65 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 4 |
micro-operation queue | 2.17 cycles |
front end | 2.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.40 | 1.40 | 1.33 | 1.33 | 0.50 | 1.40 | 1.40 | 0.50 | 0.50 | 0.50 | 1.40 | 1.33 |
cycles | 1.40 | 1.40 | 1.33 | 1.33 | 0.50 | 1.40 | 1.40 | 0.50 | 0.50 | 0.50 | 1.40 | 1.33 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 0.00-2.00 |
FE+BE cycles | 2.26 |
Stall cycles | 0.00 |
Front-end | 2.17 |
Dispatch | 1.40 |
Data deps. | 0.00-2.00 |
Overall L1 | 2.17 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV 0x18(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R12),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x90(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R9,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R9,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R9,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R9,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x3,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x88(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 43c650 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xa80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %EDI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c9d7 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xe07> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
Function | miniqmcreference::einspline_spo_ref |
Source file and lines | MultiBsplineRef.hpp:227-262 |
Module | exec |
nb instructions | 137 |
nb uops | 135 |
loop length | 848 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 7 |
used ymm registers | 32 |
used zmm registers | 0 |
nb stack references | 25 |
micro-operation queue | 22.67 cycles |
front end | 22.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 15.00 | 15.00 | 14.67 | 14.67 | 7.50 | 7.00 | 6.60 | 7.50 | 7.50 | 7.50 | 6.40 | 14.67 |
cycles | 15.00 | 15.00 | 14.67 | 14.67 | 7.50 | 7.00 | 6.60 | 7.50 | 7.50 | 7.50 | 6.40 | 14.67 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 0.00-2.00 |
FE+BE cycles | 22.97 |
Stall cycles | 0.00 |
Front-end | 22.67 |
Dispatch | 15.00 |
Data deps. | 0.00-2.00 |
Overall L1 | 22.67 |
all | 11% |
load | 100% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 25% |
all | 82% |
load | 75% |
store | 100% |
mul | 40% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 76% |
all | 76% |
load | 76% |
store | 93% |
mul | 40% |
add-sub | 0% |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 70% |
all | 15% |
load | 50% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 20% |
all | 43% |
load | 40% |
store | 50% |
mul | 27% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 41% |
all | 40% |
load | 41% |
store | 47% |
mul | 27% |
add-sub | 12% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 38% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV 0x40(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R12,%R9 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD 0x160(%RSP),%R9 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
VBROADCASTSD %XMM17,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM19,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM16,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM18,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x30(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R12,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %RDI,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPCMPNLEUQ 0xbfd70(%RIP),%YMM21,%K1 | |||||||||||||||
ADD %R12,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x60(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R9,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xc0(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD (%RBX,%RDI,8),%YMM21{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV 0xe0(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R9,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%RBX,%RDI,8),%YMM22{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM24,%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVUPD 0x4e0(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM21,%YMM24{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM23,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVUPD 0x500(%RSP),%YMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM22,%YMM23{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV 0xd0(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R9,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%RBX,%RDI,8),%YMM21{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM21,%YMM14{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
ADD 0x140(%RSP),%R9 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
VMOVUPD (%RBX,%R9,8),%YMM21{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV 0x138(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVAPD %YMM21,%YMM13{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMULPD %YMM0,%YMM24,%YMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM1,%YMM23,%YMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM2,%YMM14,%YMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM13,%YMM3,%YMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD (%R13,%R12,8),%YMM22{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM22,%YMM12{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMULPD %YMM4,%YMM24,%YMM22 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM12,%YMM21,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM20,(%R13,%R12,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMULPD %YMM5,%YMM23,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM7,%YMM13,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM6,%YMM14,%YMM22 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM20,%YMM8,%YMM22 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x10(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R12,1),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%R13,%R9,8),%YMM20{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM20,%YMM31{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD213PD %YMM31,%YMM21,%YMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM19,(%R13,%R9,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x80(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R12,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%R13,%RDI,8),%YMM19{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM19,%YMM30{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM17,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD213PD %YMM30,%YMM22,%YMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM19,(%R13,%RDI,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x70(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R12,1),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%R13,%RCX,8),%YMM19{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x520(%RSP),%YMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM19,%YMM20{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVUPD %YMM20,0x520(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VFMADD213PD %YMM20,%YMM21,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM18,(%R13,%RCX,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x78(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R12,1),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%R13,%RCX,8),%YMM18{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x540(%RSP),%YMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM18,%YMM19{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM16,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVUPD %YMM19,0x540(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VFMADD213PD %YMM19,%YMM22,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM18,(%R13,%RCX,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x68(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R12,1),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%R13,%RCX,8),%YMM18{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM18,%YMM27{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVUPD %YMM24,0x4e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMULPD %YMM9,%YMM24,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM29,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVUPD %YMM23,0x500(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VFMADD231PD %YMM10,%YMM23,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM26,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD231PD %YMM11,%YMM14,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM8,%YMM13,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM27,%YMM15,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM18,(%R13,%RCX,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD (%RBX,%R12,8),%YMM18{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM18,%YMM28{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD213PD %YMM28,%YMM21,%YMM17 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM17,(%RBX,%R12,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD (%RBX,%R9,8),%YMM17{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM17,%YMM24{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD213PD %YMM24,%YMM21,%YMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM16,(%RBX,%R9,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD (%RBX,%RDI,8),%YMM16{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM16,%YMM23{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD213PD %YMM23,%YMM15,%YMM22 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM22,(%RBX,%RDI,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x30(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD (%R14,%R12,8),%YMM16{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM16,%YMM25{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD213PD %YMM25,%YMM15,%YMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM21,(%R14,%R12,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x18(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R12),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x90(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R9,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R9,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R9,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R9,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x3,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x88(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 43c650 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xa80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %EDI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c9d7 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xe07> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x18(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x380(%RSP,%R12,8),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD 0x2a0(%RSP),%XMM15,%XMM17 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0x280(%RSP),%XMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD %XMM19,%XMM15,%XMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD 0x260(%RSP),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD %XMM20,%XMM15,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD 0x220(%RSP,%R12,8),%XMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD %XMM19,%XMM18,%XMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM20,%XMM18,%XMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD 0x1e0(%RSP,%R12,8),%XMM20,%XMM18 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 43c720 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xb50> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
Function | miniqmcreference::einspline_spo_ref |
Source file and lines | MultiBsplineRef.hpp:227-262 |
Module | exec |
nb instructions | 61 |
nb uops | 61 |
loop length | 371 |
used x86 registers | 9 |
used mmx registers | 0 |
used xmm registers | 7 |
used ymm registers | 17 |
used zmm registers | 0 |
nb stack references | 14 |
micro-operation queue | 10.33 cycles |
front end | 10.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.00 | 5.00 | 6.00 | 6.00 | 3.00 | 6.00 | 5.00 | 3.00 | 3.00 | 3.00 | 5.00 | 6.00 |
cycles | 5.00 | 5.00 | 6.00 | 6.00 | 3.00 | 6.00 | 5.00 | 3.00 | 3.00 | 3.00 | 5.00 | 6.00 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 0.00-2.00 |
FE+BE cycles | 10.54 |
Stall cycles | 0.00 |
Front-end | 10.33 |
Dispatch | 6.00 |
Data deps. | 0.00-2.00 |
Overall L1 | 10.33 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 55% |
load | 45% |
store | 100% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 62% |
all | 41% |
load | 35% |
store | 83% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 52% |
all | 11% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 33% |
load | 29% |
store | 50% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 35% |
all | 27% |
load | 25% |
store | 43% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 31% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV 0x18(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R12),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x90(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R9,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R9,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R9,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R9,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x3,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x88(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 43c650 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xa80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %EDI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c9d7 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xe07> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x18(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x380(%RSP,%R12,8),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD 0x2a0(%RSP),%XMM15,%XMM17 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0x280(%RSP),%XMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD %XMM19,%XMM15,%XMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD 0x260(%RSP),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD %XMM20,%XMM15,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD 0x220(%RSP,%R12,8),%XMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD %XMM19,%XMM18,%XMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM20,%XMM18,%XMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD 0x1e0(%RSP,%R12,8),%XMM20,%XMM18 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 43c720 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xb50> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD %YMM14,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM13,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM12,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM31,0x2c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM30,0x2e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVAPD %YMM27,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM28,%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM24,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM23,%YMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM25,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VBROADCASTSD %XMM17,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM19,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM16,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM18,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x40(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R12,%R9 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD 0x160(%RSP),%R9 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVAPD %YMM28,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM30,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM31,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM29,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM12,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVUPD 0x2e0(%RSP),%YMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x2c0(%RSP),%YMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x1c0(%RSP),%YMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x1a0(%RSP),%YMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x180(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
JNE 43c757 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xb87> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 43c9d7 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xe07> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | miniqmcreference::einspline_spo_ref |
Source file and lines | MultiBsplineRef.hpp:227-262 |
Module | exec |
nb instructions | 160 |
nb uops | 159 |
loop length | 1006 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 7 |
used ymm registers | 32 |
used zmm registers | 0 |
nb stack references | 30 |
micro-operation queue | 26.67 cycles |
front end | 26.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 15.00 | 15.00 | 16.33 | 16.33 | 10.00 | 7.80 | 7.60 | 10.00 | 10.00 | 10.00 | 7.60 | 16.33 |
cycles | 15.00 | 15.00 | 16.33 | 16.33 | 10.00 | 7.80 | 7.60 | 10.00 | 10.00 | 10.00 | 7.60 | 16.33 |
Cycles executing div or sqrt instructions | NA |
Longest recurrence chain latency (RecMII) | 0.00-2.00 |
FE+BE cycles | 26.95 |
Stall cycles | 0.00 |
Front-end | 26.67 |
Dispatch | 16.33 |
Data deps. | 0.00-2.00 |
Overall L1 | 26.67 |
all | 10% |
load | 100% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 20% |
all | 85% |
load | 79% |
store | 100% |
mul | 40% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 83% |
all | 79% |
load | 80% |
store | 95% |
mul | 40% |
add-sub | 0% |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 75% |
all | 15% |
load | 50% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 18% |
all | 44% |
load | 42% |
store | 50% |
mul | 27% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 43% |
all | 42% |
load | 42% |
store | 48% |
mul | 27% |
add-sub | 12% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 40% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV 0x30(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %R12,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VPBROADCASTQ %RDI,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPCMPNLEUQ 0xbfd70(%RIP),%YMM21,%K1 | |||||||||||||||
ADD %R12,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x60(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R9,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xc0(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD (%RBX,%RDI,8),%YMM21{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV 0xe0(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R9,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%RBX,%RDI,8),%YMM22{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM24,%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVUPD 0x4e0(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM21,%YMM24{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM23,%YMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVUPD 0x500(%RSP),%YMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM22,%YMM23{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV 0xd0(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R9,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%RBX,%RDI,8),%YMM21{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM21,%YMM14{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
ADD 0x140(%RSP),%R9 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
VMOVUPD (%RBX,%R9,8),%YMM21{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV 0x138(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVAPD %YMM21,%YMM13{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMULPD %YMM0,%YMM24,%YMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM1,%YMM23,%YMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM2,%YMM14,%YMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM13,%YMM3,%YMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD (%R13,%R12,8),%YMM22{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM22,%YMM12{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMULPD %YMM4,%YMM24,%YMM22 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM12,%YMM21,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM20,(%R13,%R12,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMULPD %YMM5,%YMM23,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM7,%YMM13,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM6,%YMM14,%YMM22 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM20,%YMM8,%YMM22 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV 0x10(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R12,1),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%R13,%R9,8),%YMM20{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM20,%YMM31{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD213PD %YMM31,%YMM21,%YMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM19,(%R13,%R9,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x80(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R12,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%R13,%RDI,8),%YMM19{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM19,%YMM30{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM17,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD213PD %YMM30,%YMM22,%YMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM19,(%R13,%RDI,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x70(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R12,1),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%R13,%RCX,8),%YMM19{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x520(%RSP),%YMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM19,%YMM20{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVUPD %YMM20,0x520(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VFMADD213PD %YMM20,%YMM21,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM18,(%R13,%RCX,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x78(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R12,1),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%R13,%RCX,8),%YMM18{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x540(%RSP),%YMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM18,%YMM19{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM16,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVUPD %YMM19,0x540(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VFMADD213PD %YMM19,%YMM22,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM18,(%R13,%RCX,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x68(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R12,1),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD (%R13,%RCX,8),%YMM18{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM18,%YMM27{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVUPD %YMM24,0x4e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMULPD %YMM9,%YMM24,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM29,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVUPD %YMM23,0x500(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VFMADD231PD %YMM10,%YMM23,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM26,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD231PD %YMM11,%YMM14,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM8,%YMM13,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM27,%YMM15,%YMM18 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM18,(%R13,%RCX,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD (%RBX,%R12,8),%YMM18{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM18,%YMM28{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD213PD %YMM28,%YMM21,%YMM17 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM17,(%RBX,%R12,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD (%RBX,%R9,8),%YMM17{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM17,%YMM24{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD213PD %YMM24,%YMM21,%YMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM16,(%RBX,%R9,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD (%RBX,%RDI,8),%YMM16{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM16,%YMM23{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD213PD %YMM23,%YMM15,%YMM22 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM22,(%RBX,%RDI,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x30(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD (%R14,%R12,8),%YMM16{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM16,%YMM25{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD213PD %YMM25,%YMM15,%YMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVUPD %YMM21,(%R14,%R12,8){%K1} | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV 0x18(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R12),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x90(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %R9,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R9,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R9,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R9,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0x3,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x88(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 43c650 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xa80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %EDI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c9d7 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xe07> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x18(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x380(%RSP,%R12,8),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD 0x2a0(%RSP),%XMM15,%XMM17 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0x280(%RSP),%XMM19 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD %XMM19,%XMM15,%XMM16 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD 0x260(%RSP),%XMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD %XMM20,%XMM15,%XMM15 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD 0x220(%RSP,%R12,8),%XMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD %XMM19,%XMM18,%XMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD %XMM20,%XMM18,%XMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULSD 0x1e0(%RSP,%R12,8),%XMM20,%XMM18 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
TEST %RCX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 43c720 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xb50> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD %YMM14,0x180(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM13,0x1a0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM12,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM31,0x2c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM30,0x2e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVAPD %YMM27,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM28,%YMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM24,%YMM31 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM23,%YMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM25,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VBROADCASTSD %XMM17,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM19,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM16,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM18,%YMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM21,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x40(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R12,%R9 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD 0x160(%RSP),%R9 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVAPD %YMM28,%YMM25 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM30,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM31,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM29,%YMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM12,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVUPD 0x2e0(%RSP),%YMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x2c0(%RSP),%YMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x1c0(%RSP),%YMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x1a0(%RSP),%YMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x180(%RSP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
JNE 43c757 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0xb87> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |