Loop Id: 390 | Module: exec | Source: BsplineFunctor.h:229-260 [...] | Coverage: 0.02% |
---|
Loop Id: 390 | Module: exec | Source: BsplineFunctor.h:229-260 [...] | Coverage: 0.02% |
---|
0x420a30 VXORPD %XMM18,%XMM18,%XMM18 |
0x420a36 VADDSD %XMM0,%XMM18,%XMM0 |
0x420a3c LEA 0x1(%R8),%RAX |
0x420a40 CMP %RCX,%R8 |
0x420a43 MOV %RAX,%R8 |
0x420a46 JE 4208cc |
0x420a4c MOV 0x268(%R14),%RSI |
0x420a53 MOV 0x18(%RSI),%RSI |
0x420a57 MOV (%RSI,%R8,4),%R13 |
0x420a5b MOV %R13,%RSI |
0x420a5e SHR $0x20,%RSI |
0x420a62 SUB %R13D,%ESI |
0x420a65 TEST %ESI,%ESI |
0x420a67 JLE 420a30 |
0x420a69 MOV 0x50(%RSP),%RAX |
0x420a6e LEA (%R8,%RAX,1),%RDI |
0x420a72 MOV 0x58(%RSP),%RAX |
0x420a77 MOV (%RAX,%RDI,8),%R9 |
0x420a7b VMOVSD 0x8(%R9),%XMM2 |
0x420a81 MOV %ESI,%R15D |
0x420a84 MOV %R15,%R11 |
0x420a87 MOVSXD %R13D,%R12 |
0x420a8a MOV $-0x10,%EAX |
0x420a8f AND %RAX,%R11 |
0x420a92 JE 420e00 |
0x420a98 MOV %RBX,%RCX |
0x420a9b VMOVQ %R13,%XMM3 |
0x420aa0 VBROADCASTSD %XMM2,%YMM4 |
0x420aa5 MOV 0x20(%RSP),%RAX |
0x420aaa LEA (%RAX,%R12,8),%RBX |
0x420aae XOR %R10D,%R10D |
0x420ab1 XOR %ESI,%ESI |
0x420ab3 NOPW %CS:(%RAX,%RAX,1) |
(393) 0x420ac0 VMOVUPD (%RBX,%RSI,8),%YMM5 |
(393) 0x420ac5 VMOVUPD 0x20(%RBX,%RSI,8),%YMM6 |
(393) 0x420acb VMOVUPD 0x40(%RBX,%RSI,8),%YMM7 |
(393) 0x420ad1 VMOVUPD 0x60(%RBX,%RSI,8),%YMM8 |
(393) 0x420ad7 VMOVD %ESI,%XMM9 |
(393) 0x420adb VPADDD %XMM3,%XMM9,%XMM9 |
(393) 0x420adf VPBROADCASTD %XMM9,%YMM9 |
(393) 0x420ae4 VPADDD %YMM28,%YMM9,%YMM10 |
(393) 0x420aea VPADDD %YMM29,%YMM9,%YMM9 |
(393) 0x420af0 VPCMPNEQD %YMM1,%YMM9,%K0 |
(393) 0x420af7 VPCMPNEQD %YMM1,%YMM10,%K2 |
(393) 0x420afe VCMPPD $0x1,%YMM4,%YMM5,%K5 |
(393) 0x420b05 VCMPPD $0x1,%YMM4,%YMM6,%K1 |
(393) 0x420b0c KMOVW %K1,0x60(%RSP) |
(393) 0x420b12 KSHIFTLB $0x4,%K1,%K3 |
(393) 0x420b18 KORB %K3,%K5,%K3 |
(393) 0x420b1c VCMPPD $0x1,%YMM4,%YMM7,%K6 |
(393) 0x420b23 VCMPPD $0x1,%YMM4,%YMM8,%K4 |
(393) 0x420b2a MOVSXD %R10D,%R10 |
(393) 0x420b2d LEA (%RDX,%R10,8),%RDI |
(393) 0x420b31 KANDB %K0,%K3,%K7 |
(393) 0x420b35 KMOVB %K7,%EAX |
(393) 0x420b39 KANDW %K2,%K6,%K7 |
(393) 0x420b3d POPCNT %EAX,%EAX |
(393) 0x420b41 VCOMPRESSPD %YMM7,(%RDI,%RAX,8){%K7} |
(393) 0x420b48 KANDW %K0,%K5,%K5 |
(393) 0x420b4c VCOMPRESSPD %YMM5,(%RDX,%R10,8){%K5} |
(393) 0x420b53 KSHIFTLB $0x4,%K4,%K1 |
(393) 0x420b59 KORB %K1,%K6,%K1 |
(393) 0x420b5d KMOVB %K7,0xe(%RSP) |
(393) 0x420b63 KSHIFTRB $0x4,%K2,%K6 |
(393) 0x420b69 KANDW %K6,%K4,%K4 |
(393) 0x420b6d LEA (%RDI,%RAX,8),%RAX |
(393) 0x420b71 MOVZX 0xe(%RSP),%R14D |
(393) 0x420b77 POPCNT %R14D,%R14D |
(393) 0x420b7c VCOMPRESSPD %YMM8,(%RAX,%R14,8){%K4} |
(393) 0x420b83 KUNPCKBW %K3,%K1,%K1 |
(393) 0x420b87 KMOVB %K5,0xf(%RSP) |
(393) 0x420b8d KUNPCKBW %K0,%K2,%K2 |
(393) 0x420b91 KSHIFTRB $0x4,%K0,%K0 |
(393) 0x420b97 KMOVW 0x60(%RSP),%K3 |
(393) 0x420b9d KANDW %K0,%K3,%K3 |
(393) 0x420ba1 MOVZX 0xf(%RSP),%EAX |
(393) 0x420ba6 POPCNT %EAX,%EAX |
(393) 0x420baa VCOMPRESSPD %YMM6,(%RDI,%RAX,8){%K3} |
(393) 0x420bb1 KANDW %K2,%K1,%K0 |
(393) 0x420bb5 KMOVW %K0,%EAX |
(393) 0x420bb9 POPCNT %EAX,%EAX |
(393) 0x420bbd ADD %EAX,%R10D |
(393) 0x420bc0 ADD $0x10,%RSI |
(393) 0x420bc4 CMP %R11,%RSI |
(393) 0x420bc7 JB 420ac0 |
0x420bcd CMP %R15,%R11 |
0x420bd0 MOV %RCX,%RBX |
0x420bd3 MOV 0x48(%RSP),%R14 |
0x420bd8 MOV 0x40(%RSP),%RCX |
0x420bdd JNE 420e06 |
0x420be3 TEST %R10D,%R10D |
0x420be6 JLE 420a30 |
0x420bec VMOVSD 0x238(%R9),%XMM3 |
0x420bf5 MOV 0x218(%R9),%R11 |
0x420bfc VMOVSD 0x18(%R9),%XMM18 |
0x420c03 VMOVSD 0x20(%R9),%XMM2 |
0x420c09 VMOVSD 0x28(%R9),%XMM6 |
0x420c0f VMOVSD 0x30(%R9),%XMM5 |
0x420c15 VMOVSD 0x38(%R9),%XMM17 |
0x420c1c VMOVSD 0x40(%R9),%XMM11 |
0x420c22 VMOVSD 0x48(%R9),%XMM13 |
0x420c28 VMOVSD 0x50(%R9),%XMM15 |
0x420c2e VMOVSD 0x58(%R9),%XMM7 |
0x420c34 VMOVSD 0x60(%R9),%XMM4 |
0x420c3a VMOVSD 0x68(%R9),%XMM10 |
0x420c40 VMOVSD 0x70(%R9),%XMM9 |
0x420c46 VMOVSD 0x78(%R9),%XMM12 |
0x420c4c VMOVSD 0x80(%R9),%XMM8 |
0x420c55 VMOVSD 0x88(%R9),%XMM14 |
0x420c5e VMOVSD 0x90(%R9),%XMM16 |
0x420c65 MOV %R10D,%R10D |
0x420c68 MOV %R10,%R9 |
0x420c6b MOV $-0x4,%EAX |
0x420c70 AND %RAX,%R9 |
0x420c73 JE 420e4b |
0x420c79 VMOVUPD %YMM25,0x80(%RSP) |
0x420c81 VMOVUPD %YMM24,0x60(%RSP) |
0x420c89 VBROADCASTSD %XMM3,%YMM19 |
0x420c8f VBROADCASTSD %XMM18,%YMM3 |
0x420c95 VBROADCASTSD %XMM2,%YMM2 |
0x420c9a VBROADCASTSD %XMM6,%YMM6 |
0x420c9f VBROADCASTSD %XMM5,%YMM5 |
0x420ca4 VPBROADCASTQ %R11,%YMM20 |
0x420caa VBROADCASTSD %XMM17,%YMM17 |
0x420cb0 VBROADCASTSD %XMM11,%YMM11 |
0x420cb5 VBROADCASTSD %XMM13,%YMM13 |
0x420cba VBROADCASTSD %XMM15,%YMM15 |
0x420cbf VBROADCASTSD %XMM7,%YMM7 |
0x420cc4 VBROADCASTSD %XMM4,%YMM4 |
0x420cc9 VBROADCASTSD %XMM10,%YMM10 |
0x420cce VBROADCASTSD %XMM9,%YMM9 |
0x420cd3 VBROADCASTSD %XMM12,%YMM12 |
0x420cd8 VBROADCASTSD %XMM8,%YMM8 |
0x420cdd VBROADCASTSD %XMM14,%YMM14 |
0x420ce2 VBROADCASTSD %XMM16,%YMM16 |
0x420ce8 VXORPD %XMM18,%XMM18,%XMM18 |
0x420cee XOR %ESI,%ESI |
(391) 0x420cf0 VMULPD (%RDX,%RSI,8),%YMM19,%YMM21 |
(391) 0x420cf7 VCVTTPD2DQ %YMM21,%XMM22 |
(391) 0x420cfd KXNORW %K0,%K0,%K1 |
(391) 0x420d01 VXORPD %XMM23,%XMM23,%XMM23 |
(391) 0x420d07 VGATHERDPD (%R11,%XMM22,8),%YMM23{%K1} |
(391) 0x420d0e KXNORW %K0,%K0,%K1 |
(391) 0x420d12 VXORPD %XMM24,%XMM24,%XMM24 |
(391) 0x420d18 VGATHERDPD 0x8(%R11,%XMM22,8),%YMM24{%K1} |
(391) 0x420d20 KXNORW %K0,%K0,%K1 |
(391) 0x420d24 VXORPD %XMM25,%XMM25,%XMM25 |
(391) 0x420d2a VGATHERDPD 0x10(%R11,%XMM22,8),%YMM25{%K1} |
(391) 0x420d32 VRNDSCALEPD $0xb,%YMM21,%YMM26 |
(391) 0x420d39 VSUBPD %YMM26,%YMM21,%YMM21 |
(391) 0x420d3f KXNORW %K0,%K0,%K1 |
(391) 0x420d43 VXORPD %XMM26,%XMM26,%XMM26 |
(391) 0x420d49 VGATHERDPD 0x18(%R11,%XMM22,8),%YMM26{%K1} |
(391) 0x420d51 VMOVAPD %YMM21,%YMM22 |
(391) 0x420d57 VFMADD213PD %YMM2,%YMM3,%YMM22 |
(391) 0x420d5d VFMADD213PD %YMM6,%YMM21,%YMM22 |
(391) 0x420d63 VFMADD213PD %YMM5,%YMM21,%YMM22 |
(391) 0x420d69 VFMADD213PD %YMM18,%YMM23,%YMM22 |
(391) 0x420d6f VMOVAPD %YMM21,%YMM18 |
(391) 0x420d75 VFMADD213PD %YMM11,%YMM17,%YMM18 |
(391) 0x420d7b VFMADD213PD %YMM13,%YMM21,%YMM18 |
(391) 0x420d81 VFMADD213PD %YMM15,%YMM21,%YMM18 |
(391) 0x420d87 VFMADD213PD %YMM22,%YMM24,%YMM18 |
(391) 0x420d8d VMOVAPD %YMM21,%YMM22 |
(391) 0x420d93 VFMADD213PD %YMM4,%YMM7,%YMM22 |
(391) 0x420d99 VFMADD213PD %YMM10,%YMM21,%YMM22 |
(391) 0x420d9f VFMADD213PD %YMM9,%YMM21,%YMM22 |
(391) 0x420da5 VFMADD213PD %YMM18,%YMM25,%YMM22 |
(391) 0x420dab VMOVAPD %YMM21,%YMM18 |
(391) 0x420db1 VFMADD213PD %YMM8,%YMM12,%YMM18 |
(391) 0x420db7 VFMADD213PD %YMM14,%YMM21,%YMM18 |
(391) 0x420dbd VFMADD213PD %YMM16,%YMM21,%YMM18 |
(391) 0x420dc3 VFMADD213PD %YMM22,%YMM26,%YMM18 |
(391) 0x420dc9 ADD $0x4,%RSI |
(391) 0x420dcd CMP %R9,%RSI |
(391) 0x420dd0 JB 420cf0 |
0x420dd6 VEXTRACTF32X4 $0x1,%YMM18,%XMM21 |
0x420ddd VADDPD %XMM21,%XMM18,%XMM18 |
0x420de3 VSHUFPD $0x1,%XMM18,%XMM18,%XMM21 |
0x420dea VADDSD %XMM21,%XMM18,%XMM18 |
0x420df0 CMP %R10,%R9 |
0x420df3 JNE 420ebb |
0x420df9 JMP 420a36 |
0x420e00 XOR %R10D,%R10D |
0x420e03 XOR %R11D,%R11D |
0x420e06 MOV %EBX,%ESI |
0x420e08 SUB %R13D,%ESI |
0x420e0b MOV 0x20(%RSP),%RAX |
0x420e10 LEA (%RAX,%R12,8),%RDI |
0x420e14 JMP 420e2c |
(392) 0x420e20 INC %R11 |
(392) 0x420e23 CMP %R11,%R15 |
(392) 0x420e26 JE 420be3 |
(392) 0x420e2c VMOVSD (%RDI,%R11,8),%XMM3 |
(392) 0x420e32 VUCOMISD %XMM3,%XMM2 |
(392) 0x420e36 JBE 420e20 |
(392) 0x420e38 CMP %R11D,%ESI |
(392) 0x420e3b JE 420e20 |
(392) 0x420e3d MOVSXD %R10D,%R10 |
(392) 0x420e40 VMOVSD %XMM3,(%RDX,%R10,8) |
(392) 0x420e46 INC %R10D |
(392) 0x420e49 JMP 420e20 |
0x420e4b VPBROADCASTQ %R10,%YMM21 |
0x420e51 VBROADCASTSD %XMM3,%YMM19 |
0x420e57 VBROADCASTSD %XMM18,%YMM3 |
0x420e5d VBROADCASTSD %XMM2,%YMM2 |
0x420e62 VBROADCASTSD %XMM6,%YMM6 |
0x420e67 VBROADCASTSD %XMM5,%YMM5 |
0x420e6c VPBROADCASTQ %R11,%YMM20 |
0x420e72 VBROADCASTSD %XMM17,%YMM17 |
0x420e78 VBROADCASTSD %XMM11,%YMM11 |
0x420e7d VBROADCASTSD %XMM13,%YMM13 |
0x420e82 VBROADCASTSD %XMM15,%YMM15 |
0x420e87 VBROADCASTSD %XMM7,%YMM7 |
0x420e8c VBROADCASTSD %XMM4,%YMM4 |
0x420e91 VBROADCASTSD %XMM10,%YMM10 |
0x420e96 VBROADCASTSD %XMM9,%YMM9 |
0x420e9b VBROADCASTSD %XMM12,%YMM12 |
0x420ea0 VBROADCASTSD %XMM8,%YMM8 |
0x420ea5 VBROADCASTSD %XMM14,%YMM14 |
0x420eaa VBROADCASTSD %XMM16,%YMM16 |
0x420eb0 VXORPD %XMM18,%XMM18,%XMM18 |
0x420eb6 XOR %R9D,%R9D |
0x420eb9 JMP 420ed1 |
0x420ebb VPBROADCASTQ %R10,%YMM21 |
0x420ec1 VMOVUPD 0x60(%RSP),%YMM24 |
0x420ec9 VMOVUPD 0x80(%RSP),%YMM25 |
0x420ed1 VPBROADCASTQ %R9,%YMM22 |
0x420ed7 VPSUBQ %YMM22,%YMM21,%YMM21 |
0x420edd VPCMPNLEUQ 0xdb5f8(%RIP),%YMM21,%K1 |
0x420ee8 VMOVUPD (%RDX,%R9,8),%YMM21{%K1}{z} |
0x420eef VMOVAPD %YMM21,%YMM31{%K1} |
0x420ef5 VMULPD %YMM31,%YMM19,%YMM19 |
0x420efb VCVTTPD2DQ %YMM19,%XMM21 |
0x420f01 VPMOVSXDQ %XMM21,%YMM21 |
0x420f07 VPSLLQ $0x3,%YMM21,%YMM21 |
0x420f0e VPADDQ %YMM21,%YMM20,%YMM20 |
0x420f14 KMOVQ %K1,%K2 |
0x420f19 VPXORD %XMM21,%XMM21,%XMM21 |
0x420f1f VGATHERQPD (,%YMM20,1),%YMM21{%K2} |
0x420f2a VRNDSCALEPD $0xb,%YMM19,%YMM22 |
0x420f31 KMOVQ %K1,%K2 |
0x420f36 VXORPD %XMM23,%XMM23,%XMM23 |
0x420f3c VGATHERQPD 0x8(,%YMM20,1),%YMM23{%K2} |
0x420f47 VSUBPD %YMM22,%YMM19,%YMM19 |
0x420f4d VMOVAPD %YMM21,%YMM30{%K1} |
0x420f53 VMOVAPD %YMM23,%YMM27{%K1} |
0x420f59 VFMADD231PD %YMM17,%YMM19,%YMM11 |
0x420f5f KMOVQ %K1,%K2 |
0x420f64 VXORPD %XMM17,%XMM17,%XMM17 |
0x420f6a VGATHERQPD 0x10(,%YMM20,1),%YMM17{%K2} |
0x420f75 VFMADD213PD %YMM13,%YMM19,%YMM11 |
0x420f7b VFMADD213PD %YMM15,%YMM19,%YMM11 |
0x420f81 VMULPD %YMM27,%YMM11,%YMM11 |
0x420f87 VMOVAPD %YMM17,%YMM25{%K1} |
0x420f8d KMOVQ %K1,%K2 |
0x420f92 VXORPD %XMM13,%XMM13,%XMM13 |
0x420f97 VGATHERQPD 0x18(,%YMM20,1),%YMM13{%K2} |
0x420fa2 VFMADD231PD %YMM12,%YMM19,%YMM8 |
0x420fa8 VFMADD213PD %YMM14,%YMM19,%YMM8 |
0x420fae VFMADD213PD %YMM16,%YMM19,%YMM8 |
0x420fb4 VMOVAPD %YMM13,%YMM24{%K1} |
0x420fba VFMADD231PD %YMM3,%YMM19,%YMM2 |
0x420fc0 VFMADD213PD %YMM6,%YMM19,%YMM2 |
0x420fc6 VFMADD213PD %YMM5,%YMM19,%YMM2 |
0x420fcc VFMADD213PD %YMM11,%YMM30,%YMM2 |
0x420fd2 VFMADD231PD %YMM7,%YMM19,%YMM4 |
0x420fd8 VFMADD213PD %YMM10,%YMM19,%YMM4 |
0x420fde VFMADD213PD %YMM9,%YMM19,%YMM4 |
0x420fe4 VFMADD213PD %YMM2,%YMM25,%YMM4 |
0x420fea VFMADD231PD %YMM8,%YMM24,%YMM4{%K1}{z} |
0x420ff0 VEXTRACTF128 $0x1,%YMM4,%XMM2 |
0x420ff6 VADDPD %XMM2,%XMM4,%XMM2 |
0x420ffa VSHUFPD $0x1,%XMM2,%XMM2,%XMM3 |
0x420fff VADDSD %XMM3,%XMM2,%XMM2 |
0x421003 VADDSD %XMM2,%XMM18,%XMM18 |
0x421009 JMP 420a36 |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/shared_ptr_base.h: 1308 - 1308 |
-------------------------------------------------------------------------------- |
1308: { return _M_ptr; } |
/scratch_na/users/xoserete/qaas_runs/171-284-5201/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 223 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
/scratch_na/users/xoserete/qaas_runs/171-284-5201/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h: 107 - 132 |
-------------------------------------------------------------------------------- |
107: for (int k = 0; k < ratios.size(); ++k) |
[...] |
127: for (int jg = 0; jg < NumGroups; ++jg) |
128: { |
129: const FuncType& f2(*F[igt + jg]); |
130: int iStart = P.first(jg); |
131: int iEnd = P.last(jg); |
132: curUat += f2.evaluateV(iat, iStart, iEnd, dist, DistCompressed.data()); |
/scratch_na/users/xoserete/qaas_runs/171-284-5201/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 229 - 260 |
-------------------------------------------------------------------------------- |
229: const real_type* restrict distArray = _distArray + iStart; |
[...] |
236: for (int jat = 0; jat < iLimit; jat++) |
237: { |
238: real_type r = distArray[jat]; |
239: // pick the distances smaller than the cutoff and avoid the reference atom |
240: if (r < cutoff_radius && iStart + jat != iat) |
241: distArrayCompressed[iCount++] = distArray[jat]; |
242: } |
243: |
244: real_type d = 0.0; |
245: //#pragma omp simd reduction(+:d) |
246: for (int jat = 0; jat < iCount; jat++) |
247: { |
248: real_type r = distArrayCompressed[jat]; |
249: r *= DeltaRInv; |
250: int i = (int)r; |
251: real_type t = r - real_type(i); |
252: real_type tp0 = t * t * t; |
253: real_type tp1 = t * t; |
254: real_type tp2 = t; |
255: |
256: real_type d1 = SplineCoefs[i + 0] * (A[0] * tp0 + A[1] * tp1 + A[2] * tp2 + A[3]); |
257: real_type d2 = SplineCoefs[i + 1] * (A[4] * tp0 + A[5] * tp1 + A[6] * tp2 + A[7]); |
258: real_type d3 = SplineCoefs[i + 2] * (A[8] * tp0 + A[9] * tp1 + A[10] * tp2 + A[11]); |
259: real_type d4 = SplineCoefs[i + 3] * (A[12] * tp0 + A[13] * tp1 + A[14] * tp2 + A[15]); |
260: d += (d1 + d2 + d3 + d4); |
/scratch_na/users/xoserete/qaas_runs/171-284-5201/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 313 - 316 |
-------------------------------------------------------------------------------- |
313: inline int first(int igroup) const { return (*group_offsets_)[igroup]; } |
314: |
315: ///return the last index of a group i |
316: inline int last(int igroup) const { return (*group_offsets_)[igroup + 1]; } |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/stl_vector.h: 951 - 951 |
-------------------------------------------------------------------------------- |
951: return *(this->_M_impl._M_start + __n); |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.04 |
CQA speedup if fully vectorized | 5.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.41 |
Bottlenecks | P5, |
Function | miniqmcreference::TwoBodyJastrowRef |
Source | shared_ptr_base.h:1308-1308,OhmmsVector.h:223-223,TwoBodyJastrowRef.h:107-107,TwoBodyJastrowRef.h:127-129,TwoBodyJastrowRef.h:132-132,BsplineFunctor.h:229-229,BsplineFunctor.h:236-236,BsplineFunctor.h:240-240,BsplineFunctor.h:246-251,BsplineFunctor.h:256-260,ParticleSet.h:313-313,ParticleSet.h:316-316,stl_vector.h:951-951 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 45.00 |
CQA cycles if no scalar integer | 45.00 |
CQA cycles if FP arith vectorized | 43.25 |
CQA cycles if fully vectorized | 9.00 |
Front-end cycles | 31.83 |
DIV/SQRT cycles | 24.00 |
P0 cycles | 25.00 |
P1 cycles | 16.00 |
P2 cycles | 16.00 |
P3 cycles | 1.00 |
P4 cycles | 45.00 |
P5 cycles | 10.00 |
P6 cycles | 1.00 |
P7 cycles | 1.00 |
P8 cycles | 1.00 |
P9 cycles | 10.00 |
P10 cycles | 16.00 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 45.71 - 89.13 |
Stall cycles (UFS) | 14.36 - 57.78 |
Nb insns | 174.00 |
Nb uops | 191.00 |
Nb loads | 37.00 |
Nb stores | 2.00 |
Nb stack references | 7.00 |
FLOP/cycle | 3.11 |
Nb FLOP add-sub | 12.00 |
Nb FLOP mul | 8.00 |
Nb FLOP fma | 60.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 12.27 |
Bytes prefetched | 0.00 |
Bytes loaded | 488.00 |
Bytes stored | 64.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 39.69 |
Vectorization ratio load | 28.57 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 55.56 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 31.25 |
Vector-efficiency ratio all | 24.38 |
Vector-efficiency ratio load | 23.21 |
Vector-efficiency ratio store | 50.00 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 27.78 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 19.92 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.04 |
CQA speedup if fully vectorized | 5.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.41 |
Bottlenecks | P5, |
Function | miniqmcreference::TwoBodyJastrowRef |
Source | shared_ptr_base.h:1308-1308,OhmmsVector.h:223-223,TwoBodyJastrowRef.h:107-107,TwoBodyJastrowRef.h:127-129,TwoBodyJastrowRef.h:132-132,BsplineFunctor.h:229-229,BsplineFunctor.h:236-236,BsplineFunctor.h:240-240,BsplineFunctor.h:246-251,BsplineFunctor.h:256-260,ParticleSet.h:313-313,ParticleSet.h:316-316,stl_vector.h:951-951 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 45.00 |
CQA cycles if no scalar integer | 45.00 |
CQA cycles if FP arith vectorized | 43.25 |
CQA cycles if fully vectorized | 9.00 |
Front-end cycles | 31.83 |
DIV/SQRT cycles | 24.00 |
P0 cycles | 25.00 |
P1 cycles | 16.00 |
P2 cycles | 16.00 |
P3 cycles | 1.00 |
P4 cycles | 45.00 |
P5 cycles | 10.00 |
P6 cycles | 1.00 |
P7 cycles | 1.00 |
P8 cycles | 1.00 |
P9 cycles | 10.00 |
P10 cycles | 16.00 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 45.71 - 89.13 |
Stall cycles (UFS) | 14.36 - 57.78 |
Nb insns | 174.00 |
Nb uops | 191.00 |
Nb loads | 37.00 |
Nb stores | 2.00 |
Nb stack references | 7.00 |
FLOP/cycle | 3.11 |
Nb FLOP add-sub | 12.00 |
Nb FLOP mul | 8.00 |
Nb FLOP fma | 60.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 12.27 |
Bytes prefetched | 0.00 |
Bytes loaded | 488.00 |
Bytes stored | 64.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 39.69 |
Vectorization ratio load | 28.57 |
Vectorization ratio store | 100.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 55.56 |
Vectorization ratio fma | 100.00 |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 31.25 |
Vector-efficiency ratio all | 24.38 |
Vector-efficiency ratio load | 23.21 |
Vector-efficiency ratio store | 50.00 |
Vector-efficiency ratio mul | 50.00 |
Vector-efficiency ratio add_sub | 27.78 |
Vector-efficiency ratio fma | 50.00 |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 19.92 |
Path / |
Function | miniqmcreference::TwoBodyJastrowRef |
Source file and lines | BsplineFunctor.h:229-260 |
Module | exec |
nb instructions | 174 |
nb uops | 191 |
loop length | 948 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 20 |
used ymm registers | 27 |
used zmm registers | 0 |
nb stack references | 7 |
ADD-SUB / MUL ratio | 3.50 |
micro-operation queue | 31.83 cycles |
front end | 31.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 24.00 | 25.00 | 16.00 | 16.00 | 1.00 | 45.00 | 10.00 | 1.00 | 1.00 | 1.00 | 10.00 | 16.00 |
cycles | 24.00 | 25.00 | 16.00 | 16.00 | 1.00 | 45.00 | 10.00 | 1.00 | 1.00 | 1.00 | 10.00 | 16.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 45.71-89.13 |
Stall cycles | 14.36-57.78 |
ROB full (events) | 12.82-61.22 |
RS full (events) | 1.97-0.00 |
LM full (events) | 3.03-0.06 |
Front-end | 31.83 |
Dispatch | 45.00 |
Overall L1 | 45.00 |
all | 21% |
load | 33% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 16% |
all | 44% |
load | 28% |
store | 100% |
mul | 100% |
add-sub | 42% |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 37% |
all | 39% |
load | 28% |
store | 100% |
mul | 100% |
add-sub | 55% |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 31% |
all | 17% |
load | 25% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 26% |
load | 23% |
store | 50% |
mul | 50% |
add-sub | 21% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 22% |
all | 24% |
load | 23% |
store | 50% |
mul | 50% |
add-sub | 27% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 19% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VADDSD %XMM0,%XMM18,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA 0x1(%R8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JE 4208cc <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x7c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x268(%R14),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI,%R8,4),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x20,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SUB %R13D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
TEST %ESI,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 420a30 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1e0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x50(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%RAX,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x58(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RDI,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x8(%R9),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %ESI,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD %R13D,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV $-0x10,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 420e00 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x5b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RBX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVQ %R13,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM2,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R12,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R15,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 420e06 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x5b6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R10D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 420a30 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1e0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x238(%R9),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x218(%R9),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x18(%R9),%XMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x20(%R9),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x28(%R9),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x30(%R9),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x38(%R9),%XMM17 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x40(%R9),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x48(%R9),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x50(%R9),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x58(%R9),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x60(%R9),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%R9),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70(%R9),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x78(%R9),%XMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80(%R9),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x88(%R9),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x90(%R9),%XMM16 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $-0x4,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RAX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 420e4b <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x5fb> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD %YMM25,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM24,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM3,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM18,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM17,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM11,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM13,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM4,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM10,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM16,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VEXTRACTF32X4 $0x1,%YMM18,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM21,%XMM18,%XMM18 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSHUFPD $0x1,%XMM18,%XMM18,%XMM21 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VADDSD %XMM21,%XMM18,%XMM18 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
CMP %R10,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 420ebb <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x66b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 420a36 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1e6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R13D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 420e2c <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x5dc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
VPBROADCASTQ %R10,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM18,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM17,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM11,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM13,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM4,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM10,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM16,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 420ed1 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x681> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
VPBROADCASTQ %R10,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD 0x60(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x80(%RSP),%YMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VPBROADCASTQ %R9,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPSUBQ %YMM22,%YMM21,%YMM21 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPCMPNLEUQ 0xdb5f8(%RIP),%YMM21,%K1 | |||||||||||||||
VMOVUPD (%RDX,%R9,8),%YMM21{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM21,%YMM31{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMULPD %YMM31,%YMM19,%YMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTTPD2DQ %YMM19,%XMM21 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 1 |
VPMOVSXDQ %XMM21,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPSLLQ $0x3,%YMM21,%YMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPADDQ %YMM21,%YMM20,%YMM20 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VPXORD %XMM21,%XMM21,%XMM21 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VGATHERQPD (,%YMM20,1),%YMM21{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VRNDSCALEPD $0xb,%YMM19,%YMM22 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VXORPD %XMM23,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD 0x8(,%YMM20,1),%YMM23{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VSUBPD %YMM22,%YMM19,%YMM19 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD %YMM21,%YMM30{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM23,%YMM27{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD231PD %YMM17,%YMM19,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VXORPD %XMM17,%XMM17,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD 0x10(,%YMM20,1),%YMM17{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFMADD213PD %YMM13,%YMM19,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM15,%YMM19,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM27,%YMM11,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM17,%YMM25{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD 0x18(,%YMM20,1),%YMM13{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFMADD231PD %YMM12,%YMM19,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM14,%YMM19,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM16,%YMM19,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM13,%YMM24{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD231PD %YMM3,%YMM19,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM6,%YMM19,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM5,%YMM19,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM11,%YMM30,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM7,%YMM19,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM10,%YMM19,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM9,%YMM19,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM2,%YMM25,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM8,%YMM24,%YMM4{%K1}{z} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM4,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM2,%XMM4,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSHUFPD $0x1,%XMM2,%XMM2,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VADDSD %XMM3,%XMM2,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM2,%XMM18,%XMM18 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
JMP 420a36 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1e6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | miniqmcreference::TwoBodyJastrowRef |
Source file and lines | BsplineFunctor.h:229-260 |
Module | exec |
nb instructions | 174 |
nb uops | 191 |
loop length | 948 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 20 |
used ymm registers | 27 |
used zmm registers | 0 |
nb stack references | 7 |
ADD-SUB / MUL ratio | 3.50 |
micro-operation queue | 31.83 cycles |
front end | 31.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 24.00 | 25.00 | 16.00 | 16.00 | 1.00 | 45.00 | 10.00 | 1.00 | 1.00 | 1.00 | 10.00 | 16.00 |
cycles | 24.00 | 25.00 | 16.00 | 16.00 | 1.00 | 45.00 | 10.00 | 1.00 | 1.00 | 1.00 | 10.00 | 16.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 45.71-89.13 |
Stall cycles | 14.36-57.78 |
ROB full (events) | 12.82-61.22 |
RS full (events) | 1.97-0.00 |
LM full (events) | 3.03-0.06 |
Front-end | 31.83 |
Dispatch | 45.00 |
Overall L1 | 45.00 |
all | 21% |
load | 33% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 16% |
all | 44% |
load | 28% |
store | 100% |
mul | 100% |
add-sub | 42% |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 37% |
all | 39% |
load | 28% |
store | 100% |
mul | 100% |
add-sub | 55% |
fma | 100% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 31% |
all | 17% |
load | 25% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 26% |
load | 23% |
store | 50% |
mul | 50% |
add-sub | 21% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 22% |
all | 24% |
load | 23% |
store | 50% |
mul | 50% |
add-sub | 27% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 19% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VADDSD %XMM0,%XMM18,%XMM0 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
LEA 0x1(%R8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JE 4208cc <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x7c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x268(%R14),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI,%R8,4),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x20,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
SUB %R13D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
TEST %ESI,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 420a30 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1e0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x50(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%RAX,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x58(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RDI,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x8(%R9),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %ESI,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVSXD %R13D,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV $-0x10,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RAX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 420e00 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x5b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RBX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVQ %R13,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VBROADCASTSD %XMM2,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R12,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R15,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 420e06 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x5b6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R10D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 420a30 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1e0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x238(%R9),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x218(%R9),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x18(%R9),%XMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x20(%R9),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x28(%R9),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x30(%R9),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x38(%R9),%XMM17 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x40(%R9),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x48(%R9),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x50(%R9),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x58(%R9),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x60(%R9),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x68(%R9),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70(%R9),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x78(%R9),%XMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80(%R9),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x88(%R9),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x90(%R9),%XMM16 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV $-0x4,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RAX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 420e4b <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x5fb> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVUPD %YMM25,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVUPD %YMM24,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VBROADCASTSD %XMM3,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM18,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM17,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM11,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM13,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM4,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM10,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM16,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VEXTRACTF32X4 $0x1,%YMM18,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM21,%XMM18,%XMM18 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSHUFPD $0x1,%XMM18,%XMM18,%XMM21 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VADDSD %XMM21,%XMM18,%XMM18 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
CMP %R10,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JNE 420ebb <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x66b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 420a36 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1e6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R11D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EBX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R13D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 420e2c <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x5dc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
VPBROADCASTQ %R10,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM18,%YMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM6,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM5,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R11,%YMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM17,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM11,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM13,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM15,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM7,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM4,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM10,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM9,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM12,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM8,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM14,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM16,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM18,%XMM18,%XMM18 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 420ed1 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x681> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
VPBROADCASTQ %R10,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVUPD 0x60(%RSP),%YMM24 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVUPD 0x80(%RSP),%YMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VPBROADCASTQ %R9,%YMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPSUBQ %YMM22,%YMM21,%YMM21 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VPCMPNLEUQ 0xdb5f8(%RIP),%YMM21,%K1 | |||||||||||||||
VMOVUPD (%RDX,%R9,8),%YMM21{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVAPD %YMM21,%YMM31{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMULPD %YMM31,%YMM19,%YMM19 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTTPD2DQ %YMM19,%XMM21 | 2 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 1 |
VPMOVSXDQ %XMM21,%YMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPSLLQ $0x3,%YMM21,%YMM21 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 0.50 |
VPADDQ %YMM21,%YMM20,%YMM20 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VPXORD %XMM21,%XMM21,%XMM21 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
VGATHERQPD (,%YMM20,1),%YMM21{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VRNDSCALEPD $0xb,%YMM19,%YMM22 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VXORPD %XMM23,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD 0x8(,%YMM20,1),%YMM23{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VSUBPD %YMM22,%YMM19,%YMM19 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVAPD %YMM21,%YMM30{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VMOVAPD %YMM23,%YMM27{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD231PD %YMM17,%YMM19,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VXORPD %XMM17,%XMM17,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD 0x10(,%YMM20,1),%YMM17{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFMADD213PD %YMM13,%YMM19,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM15,%YMM19,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD %YMM27,%YMM11,%YMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM17,%YMM25{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VGATHERQPD 0x18(,%YMM20,1),%YMM13{%K2} | 5 | 1 | 1 | 1.33 | 1.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1.33 | 0-29 | 2 |
VFMADD231PD %YMM12,%YMM19,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM14,%YMM19,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM16,%YMM19,%YMM8 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVAPD %YMM13,%YMM24{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
VFMADD231PD %YMM3,%YMM19,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM6,%YMM19,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM5,%YMM19,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM11,%YMM30,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM7,%YMM19,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM10,%YMM19,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM9,%YMM19,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %YMM2,%YMM25,%YMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD %YMM8,%YMM24,%YMM4{%K1}{z} | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VEXTRACTF128 $0x1,%YMM4,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VADDPD %XMM2,%XMM4,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VSHUFPD $0x1,%XMM2,%XMM2,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VADDSD %XMM3,%XMM2,%XMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDSD %XMM2,%XMM18,%XMM18 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
JMP 420a36 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE14evaluateRatiosERNS1_18VirtualParticleSetERSt6vectorIdSaIdEE+0x1e6> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |