Function: _ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLo ... | Module: libqmcwfs.so | Source: DiracDeterminantRef.cpp:231-256 [...] | Coverage: 0.12% |
---|
Function: _ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLo ... | Module: libqmcwfs.so | Source: DiracDeterminantRef.cpp:231-256 [...] | Coverage: 0.12% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Platforms/CPU/SIMD/inner_product.hpp: 82 - 155 |
-------------------------------------------------------------------------------- |
82: for (int i = 0; i < n; i++) |
83: res += a[i] * b[i]; |
[...] |
155: for (int i = 0; i < n; i++) |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsMatrix.h: 217 - 217 |
-------------------------------------------------------------------------------- |
217: inline Type_t* operator[](size_type i) { return X.data() + i * D2; } |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorOps.h: 131 - 131 |
-------------------------------------------------------------------------------- |
131: res += lhs[d] * rhs[d]; |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 62 - 62 |
-------------------------------------------------------------------------------- |
62: X[d] = T(0); |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 248 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
248: inline pointer data() { return X; } |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/DiracDeterminantRef.cpp: 231 - 256 |
-------------------------------------------------------------------------------- |
231: typename DiracDeterminantRef<DU_TYPE>::RealType DiracDeterminantRef<DU_TYPE>::evaluateLog( |
232: ParticleSet& P, |
233: ParticleSet::ParticleGradient& G, |
234: ParticleSet::ParticleLaplacian& L) |
235: { |
236: recompute(P); |
237: |
238: if (NumPtcls == 1) |
239: { |
240: ValueType y = psiM(0, 0); |
241: GradType rv = y * dpsiM(0, 0); |
242: G[FirstIndex] += rv; |
243: L[FirstIndex] += y * d2psiM(0, 0) - dot(rv, rv); |
244: } |
245: else |
246: { |
247: for (int i = 0, iat = FirstIndex; i < NumPtcls; i++, iat++) |
248: { |
249: mGradType rv = simd::dot(psiM[i], dpsiM[i], NumOrbitals); |
250: mValueType lap = simd::dot(psiM[i], d2psiM[i], NumOrbitals); |
251: G[iat] += rv; |
252: L[iat] += lap - dot(rv, rv); |
253: } |
254: } |
255: return LogValue; |
256: } |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/PETE/OperatorTags.h: 63 - 94 |
-------------------------------------------------------------------------------- |
63: return (a * b); |
[...] |
92: inline typename BinaryReturn<T1, T2, OpAddAssign>::Type_t operator()(const T1& a, const T2& b) const |
93: { |
94: (const_cast<T1&>(a) += b); |
0x5a4e0 PUSH %RBP |
0x5a4e1 MOV %RSP,%RBP |
0x5a4e4 PUSH %R15 |
0x5a4e6 PUSH %R14 |
0x5a4e8 PUSH %R13 |
0x5a4ea MOV %RDX,%R13 |
0x5a4ed PUSH %R12 |
0x5a4ef MOV %RCX,%R12 |
0x5a4f2 PUSH %RBX |
0x5a4f3 MOV %RDI,%RBX |
0x5a4f6 SUB $0x48,%RSP |
0x5a4fa CALL 5a3b0 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE9recomputeERNS1_11ParticleSetE> |
0x5a4ff MOV 0x484(%RBX),%EDX |
0x5a505 CMP $0x1,%EDX |
0x5a508 JE 5a8a3 |
0x5a50e MOVSXD 0x478(%RBX),%RAX |
0x5a515 TEST %EDX,%EDX |
0x5a517 JLE 5a876 |
0x5a51d MOV 0x18(%R13),%RCX |
0x5a521 LEA (%RAX,%RAX,2),%RSI |
0x5a525 MOV 0x18(%R12),%R14 |
0x5a52a MOVSXD %EDX,%R9 |
0x5a52d MOVSXD 0x480(%RBX),%R11 |
0x5a534 MOV %R9,-0x60(%RBP) |
0x5a538 VXORPD %XMM2,%XMM2,%XMM2 |
0x5a53c XOR %R9D,%R9D |
0x5a53f MOV 0x140(%RBX),%RDI |
0x5a546 LEA (%RCX,%RSI,8),%RSI |
0x5a54a LEA (%R14,%RAX,8),%R12 |
0x5a54e MOV 0xd8(%RBX),%R8 |
0x5a555 MOV 0x100(%RBX),%RCX |
0x5a55c MOV %R11,%R10 |
0x5a55f MOV 0x118(%RBX),%R15 |
0x5a566 SAL $0x3,%R11 |
0x5a56a MOV %RDI,-0x68(%RBP) |
0x5a56e LEA (,%R8,8),%R13 |
0x5a576 XOR %EDI,%EDI |
0x5a578 LEA (%RCX,%R11,1),%R8 |
0x5a57c NOPL (%RAX) |
(660) 0x5a580 MOVQ $0,-0x40(%RBP) |
(660) 0x5a588 VMOVAPD %XMM2,-0x50(%RBP) |
(660) 0x5a58d TEST %R10D,%R10D |
(660) 0x5a590 JLE 5a899 |
(660) 0x5a596 MOV %R8,%R14 |
(660) 0x5a599 MOV -0x68(%RBP),%RDX |
(660) 0x5a59d LEA (%R9,%R9,2),%RAX |
(660) 0x5a5a1 VXORPD %XMM0,%XMM0,%XMM0 |
(660) 0x5a5a5 SUB %RCX,%R14 |
(660) 0x5a5a8 SUB $0x8,%R14 |
(660) 0x5a5ac LEA (%RDX,%RAX,8),%RAX |
(660) 0x5a5b0 MOV %RCX,%RDX |
(660) 0x5a5b3 SHR $0x3,%R14 |
(660) 0x5a5b7 AND $0x3,%R14D |
(660) 0x5a5bb JE 5a890 |
(660) 0x5a5c1 VMOVSD (%RCX),%XMM3 |
(660) 0x5a5c5 VMOVAPD -0x50(%RBP),%XMM5 |
(660) 0x5a5ca LEA 0x8(%RCX),%RDX |
(660) 0x5a5ce ADD $0x18,%RAX |
(660) 0x5a5d2 VFMADD231SD -0x8(%RAX),%XMM3,%XMM0 |
(660) 0x5a5d8 VMOVDDUP %XMM3,%XMM4 |
(660) 0x5a5dc VFMADD132PD -0x18(%RAX),%XMM5,%XMM4 |
(660) 0x5a5e2 VMOVAPD %XMM4,-0x50(%RBP) |
(660) 0x5a5e7 CMP $0x1,%R14 |
(660) 0x5a5eb JE 5a890 |
(660) 0x5a5f1 CMP $0x2,%R14 |
(660) 0x5a5f5 JE 5a61d |
(660) 0x5a5f7 VMOVSD (%RDX),%XMM7 |
(660) 0x5a5fb VMOVAPD -0x50(%RBP),%XMM9 |
(660) 0x5a600 LEA 0x10(%RCX),%RDX |
(660) 0x5a604 ADD $0x18,%RAX |
(660) 0x5a608 VFMADD231SD -0x8(%RAX),%XMM7,%XMM0 |
(660) 0x5a60e VMOVDDUP %XMM7,%XMM8 |
(660) 0x5a612 VFMADD132PD -0x18(%RAX),%XMM9,%XMM8 |
(660) 0x5a618 VMOVAPD %XMM8,-0x50(%RBP) |
(660) 0x5a61d VMOVSD (%RDX),%XMM10 |
(660) 0x5a621 MOV %R10D,-0x54(%RBP) |
(660) 0x5a625 ADD $0x8,%RDX |
(660) 0x5a629 ADD $0x18,%RAX |
(660) 0x5a62d VMOVAPD -0x50(%RBP),%XMM12 |
(660) 0x5a632 VFMADD231SD -0x8(%RAX),%XMM10,%XMM0 |
(660) 0x5a638 VMOVDDUP %XMM10,%XMM11 |
(660) 0x5a63d VFMADD132PD -0x18(%RAX),%XMM12,%XMM11 |
(660) 0x5a643 VMOVAPD %XMM11,-0x50(%RBP) |
(660) 0x5a648 JMP 5a6a2 |
0x5a64a NOPW (%RAX,%RAX,1) |
(661) 0x5a650 VMOVSD 0x8(%RDX),%XMM12 |
(661) 0x5a655 VMOVSD 0x10(%RDX),%XMM5 |
(661) 0x5a65a ADD $0x60,%RAX |
(661) 0x5a65e ADD $0x20,%RDX |
(661) 0x5a662 VFMADD231SD 0x10(%R14),%XMM12,%XMM0 |
(661) 0x5a668 VMOVAPD -0x50(%RBP),%XMM4 |
(661) 0x5a66d VMOVDDUP %XMM5,%XMM7 |
(661) 0x5a671 VFMADD132SD -0x20(%RAX),%XMM0,%XMM5 |
(661) 0x5a677 VMOVDDUP %XMM12,%XMM3 |
(661) 0x5a67c VMOVSD -0x8(%RDX),%XMM0 |
(661) 0x5a681 VFMADD132PD -0x48(%RAX),%XMM4,%XMM3 |
(661) 0x5a687 VFMADD132PD -0x30(%RAX),%XMM3,%XMM7 |
(661) 0x5a68d VMOVDDUP %XMM0,%XMM8 |
(661) 0x5a691 VFMADD132PD -0x18(%RAX),%XMM7,%XMM8 |
(661) 0x5a697 VFMADD132SD -0x8(%RAX),%XMM5,%XMM0 |
(661) 0x5a69d VMOVAPD %XMM8,-0x50(%RBP) |
(661) 0x5a6a2 VMOVSD (%RDX),%XMM13 |
(661) 0x5a6a6 LEA 0x8(%RDX),%R10 |
(661) 0x5a6aa VMOVSD (%RAX),%XMM14 |
(661) 0x5a6ae LEA 0x18(%RAX),%R14 |
(661) 0x5a6b2 VMOVAPD -0x50(%RBP),%XMM6 |
(661) 0x5a6b7 VFMADD231SD 0x10(%RAX),%XMM13,%XMM0 |
(661) 0x5a6bd VMOVDDUP %XMM13,%XMM15 |
(661) 0x5a6c2 VFMADD132PD (%RAX),%XMM6,%XMM15 |
(661) 0x5a6c7 VMOVSD -0x50(%RBP),%XMM6 |
(661) 0x5a6cc VMOVAPD %XMM15,-0x50(%RBP) |
(661) 0x5a6d1 CMP %R10,%R8 |
(661) 0x5a6d4 JNE 5a650 |
(660) 0x5a6da MOV 0x158(%RBX),%RDX |
(660) 0x5a6e1 LEA -0x8(%R11),%R14 |
(660) 0x5a6e5 MOV -0x54(%RBP),%R10D |
(660) 0x5a6e9 VXORPD %XMM1,%XMM1,%XMM1 |
(660) 0x5a6ed MOV 0x180(%RBX),%RAX |
(660) 0x5a6f4 SHR $0x3,%R14 |
(660) 0x5a6f8 VMOVSD %XMM0,-0x40(%RBP) |
(660) 0x5a6fd IMUL %RDI,%RDX |
(660) 0x5a701 INC %R14 |
(660) 0x5a704 LEA (%RAX,%RDX,8),%RDX |
(660) 0x5a708 XOR %EAX,%EAX |
(660) 0x5a70a AND $0x7,%R14D |
(660) 0x5a70e JE 5a7a5 |
(660) 0x5a714 CMP $0x1,%R14 |
(660) 0x5a718 JE 5a791 |
(660) 0x5a71a CMP $0x2,%R14 |
(660) 0x5a71e JE 5a782 |
(660) 0x5a720 CMP $0x3,%R14 |
(660) 0x5a724 JE 5a773 |
(660) 0x5a726 CMP $0x4,%R14 |
(660) 0x5a72a JE 5a764 |
(660) 0x5a72c CMP $0x5,%R14 |
(660) 0x5a730 JE 5a755 |
(660) 0x5a732 CMP $0x6,%R14 |
(660) 0x5a736 JE 5a746 |
(660) 0x5a738 VMOVSD (%RCX),%XMM3 |
(660) 0x5a73c VFMADD231SD (%RDX),%XMM3,%XMM1 |
(660) 0x5a741 MOV $0x8,%EAX |
(660) 0x5a746 VMOVSD (%RCX,%RAX,1),%XMM4 |
(660) 0x5a74b VFMADD231SD (%RDX,%RAX,1),%XMM4,%XMM1 |
(660) 0x5a751 ADD $0x8,%RAX |
(660) 0x5a755 VMOVSD (%RCX,%RAX,1),%XMM5 |
(660) 0x5a75a VFMADD231SD (%RDX,%RAX,1),%XMM5,%XMM1 |
(660) 0x5a760 ADD $0x8,%RAX |
(660) 0x5a764 VMOVSD (%RCX,%RAX,1),%XMM7 |
(660) 0x5a769 VFMADD231SD (%RDX,%RAX,1),%XMM7,%XMM1 |
(660) 0x5a76f ADD $0x8,%RAX |
(660) 0x5a773 VMOVSD (%RCX,%RAX,1),%XMM8 |
(660) 0x5a778 VFMADD231SD (%RDX,%RAX,1),%XMM8,%XMM1 |
(660) 0x5a77e ADD $0x8,%RAX |
(660) 0x5a782 VMOVSD (%RCX,%RAX,1),%XMM9 |
(660) 0x5a787 VFMADD231SD (%RDX,%RAX,1),%XMM9,%XMM1 |
(660) 0x5a78d ADD $0x8,%RAX |
(660) 0x5a791 VMOVSD (%RCX,%RAX,1),%XMM10 |
(660) 0x5a796 VFMADD231SD (%RDX,%RAX,1),%XMM10,%XMM1 |
(660) 0x5a79c ADD $0x8,%RAX |
(660) 0x5a7a0 CMP %RAX,%R11 |
(660) 0x5a7a3 JE 5a814 |
(662) 0x5a7a5 VMOVSD (%RCX,%RAX,1),%XMM11 |
(662) 0x5a7aa VFMADD231SD (%RDX,%RAX,1),%XMM11,%XMM1 |
(662) 0x5a7b0 VMOVSD 0x8(%RCX,%RAX,1),%XMM12 |
(662) 0x5a7b6 VFMADD231SD 0x8(%RDX,%RAX,1),%XMM12,%XMM1 |
(662) 0x5a7bd VMOVSD 0x10(%RCX,%RAX,1),%XMM3 |
(662) 0x5a7c3 VFMADD231SD 0x10(%RDX,%RAX,1),%XMM3,%XMM1 |
(662) 0x5a7ca VMOVSD 0x18(%RCX,%RAX,1),%XMM4 |
(662) 0x5a7d0 VFMADD231SD 0x18(%RDX,%RAX,1),%XMM4,%XMM1 |
(662) 0x5a7d7 VMOVSD 0x20(%RCX,%RAX,1),%XMM5 |
(662) 0x5a7dd VFMADD231SD 0x20(%RDX,%RAX,1),%XMM5,%XMM1 |
(662) 0x5a7e4 VMOVSD 0x28(%RCX,%RAX,1),%XMM7 |
(662) 0x5a7ea VFMADD231SD 0x28(%RDX,%RAX,1),%XMM7,%XMM1 |
(662) 0x5a7f1 VMOVSD 0x30(%RCX,%RAX,1),%XMM8 |
(662) 0x5a7f7 VFMADD231SD 0x30(%RDX,%RAX,1),%XMM8,%XMM1 |
(662) 0x5a7fe VMOVSD 0x38(%RCX,%RAX,1),%XMM9 |
(662) 0x5a804 VFMADD231SD 0x38(%RDX,%RAX,1),%XMM9,%XMM1 |
(662) 0x5a80b ADD $0x40,%RAX |
(662) 0x5a80f CMP %RAX,%R11 |
(662) 0x5a812 JNE 5a7a5 |
(660) 0x5a814 VMULSD %XMM0,%XMM0,%XMM0 |
(660) 0x5a818 VFMADD231SD %XMM14,%XMM13,%XMM6 |
(660) 0x5a81d VUNPCKHPD %XMM15,%XMM15,%XMM15 |
(660) 0x5a822 VFMADD132SD %XMM15,%XMM0,%XMM15 |
(660) 0x5a827 VFMADD132SD %XMM6,%XMM15,%XMM6 |
(660) 0x5a82c VMOVUPD (%RSI),%XMM13 |
(660) 0x5a830 VMOVSD 0x10(%RSI),%XMM10 |
(660) 0x5a835 ADD %R13,%RCX |
(660) 0x5a838 ADD $0x18,%RSI |
(660) 0x5a83c VADDPD -0x50(%RBP),%XMM13,%XMM14 |
(660) 0x5a841 VADDSD -0x40(%RBP),%XMM10,%XMM11 |
(660) 0x5a846 ADD %R15,%R9 |
(660) 0x5a849 ADD %R13,%R8 |
(660) 0x5a84c MOV -0x60(%RBP),%RDX |
(660) 0x5a850 VMOVUPD %XMM14,-0x18(%RSI) |
(660) 0x5a855 VMOVSD %XMM11,-0x8(%RSI) |
(660) 0x5a85a VADDSD (%R12,%RDI,8),%XMM1,%XMM1 |
(660) 0x5a860 VSUBSD %XMM6,%XMM1,%XMM6 |
(660) 0x5a864 VMOVSD %XMM6,(%R12,%RDI,8) |
(660) 0x5a86a INC %RDI |
(660) 0x5a86d CMP %RDX,%RDI |
(660) 0x5a870 JNE 5a580 |
0x5a876 VMOVSD 0x10(%RBX),%XMM0 |
0x5a87b ADD $0x48,%RSP |
0x5a87f POP %RBX |
0x5a880 POP %R12 |
0x5a882 POP %R13 |
0x5a884 POP %R14 |
0x5a886 POP %R15 |
0x5a888 POP %RBP |
0x5a889 RET |
0x5a88a NOPW (%RAX,%RAX,1) |
(660) 0x5a890 MOV %R10D,-0x54(%RBP) |
(660) 0x5a894 JMP 5a6a2 |
(660) 0x5a899 VXORPD %XMM6,%XMM6,%XMM6 |
(660) 0x5a89d VMOVSD %XMM6,%XMM6,%XMM1 |
(660) 0x5a8a1 JMP 5a82c |
0x5a8a3 MOV 0x100(%RBX),%R11 |
0x5a8aa MOV 0x140(%RBX),%R15 |
0x5a8b1 MOVSXD 0x478(%RBX),%RSI |
0x5a8b8 MOV 0x18(%R13),%R13 |
0x5a8bc VMOVSD (%R11),%XMM2 |
0x5a8c1 VMULSD 0x10(%R15),%XMM2,%XMM0 |
0x5a8c7 LEA (%RSI,%RSI,2),%RCX |
0x5a8cb MOV 0x18(%R12),%R12 |
0x5a8d0 MOV 0x180(%RBX),%R9 |
0x5a8d7 VMOVDDUP %XMM2,%XMM9 |
0x5a8db LEA (%R13,%RCX,8),%R8 |
0x5a8e0 VMULPD (%R15),%XMM9,%XMM15 |
0x5a8e5 VADDSD 0x10(%R8),%XMM0,%XMM11 |
0x5a8eb LEA (%R12,%RSI,8),%RDI |
0x5a8ef VADDPD (%R8),%XMM15,%XMM10 |
0x5a8f4 VMOVSD %XMM11,0x10(%R8) |
0x5a8fa VMOVUPD %XMM10,(%R8) |
0x5a8ff VFNMADD213SD (%RDI),%XMM0,%XMM0 |
0x5a904 VUNPCKHPD %XMM15,%XMM15,%XMM13 |
0x5a909 VMOVSD %XMM15,%XMM15,%XMM14 |
0x5a90e VMULSD %XMM13,%XMM13,%XMM1 |
0x5a913 VFMADD132SD %XMM15,%XMM1,%XMM14 |
0x5a918 VSUBSD %XMM14,%XMM0,%XMM6 |
0x5a91d VFMADD132SD (%R9),%XMM6,%XMM2 |
0x5a922 VMOVSD %XMM2,(%RDI) |
0x5a926 JMP 5a876 |
0x5a92b NOP |
0x5a92c NOPL (%RAX) |
Path / |
Source file and lines | DiracDeterminantRef.cpp:231-256 |
Module | libqmcwfs.so |
nb instructions | 78 |
nb uops | 79 |
loop length | 333 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 2 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 13.17 cycles |
front end | 13.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.83 | 4.50 | 10.00 | 10.00 | 6.00 | 4.67 | 2.40 | 6.00 | 6.00 | 6.00 | 2.60 | 10.00 |
cycles | 4.83 | 4.50 | 10.00 | 10.00 | 6.00 | 4.67 | 2.40 | 6.00 | 6.00 | 6.00 | 2.60 | 10.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 13.36-13.40 |
Stall cycles | 0.00 |
Front-end | 13.17 |
Dispatch | 10.00 |
Overall L1 | 13.17 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 22% |
load | 25% |
store | 33% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 16% |
load | 22% |
store | 20% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 9% |
load | 6% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 15% |
load | 15% |
store | 16% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 13% |
load | 14% |
store | 15% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 5a3b0 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE9recomputeERNS1_11ParticleSetE> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x484(%RBX),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 5a8a3 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x3c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x478(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EDX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 5a876 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x396> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x18(%R13),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EDX,%R9 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOVSXD 0x480(%RBX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x140(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%RSI,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R14,%RAX,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xd8(%RBX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x100(%RBX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x118(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RDI,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R8,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RCX,%R11,1),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x10(%RBX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x100(%RBX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x140(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x478(%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R11),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD 0x10(%R15),%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
LEA (%RSI,%RSI,2),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x180(%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP %XMM2,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
LEA (%R13,%RCX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMULPD (%R15),%XMM9,%XMM15 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VADDSD 0x10(%R8),%XMM0,%XMM11 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
LEA (%R12,%RSI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VADDPD (%R8),%XMM15,%XMM10 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM11,0x10(%R8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM10,(%R8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VFNMADD213SD (%RDI),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUNPCKHPD %XMM15,%XMM15,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVSD %XMM15,%XMM15,%XMM14 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULSD %XMM13,%XMM13,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM15,%XMM1,%XMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM14,%XMM0,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD132SD (%R9),%XMM6,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM2,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 5a876 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x396> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | DiracDeterminantRef.cpp:231-256 |
Module | libqmcwfs.so |
nb instructions | 78 |
nb uops | 79 |
loop length | 333 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 2 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 13.17 cycles |
front end | 13.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.83 | 4.50 | 10.00 | 10.00 | 6.00 | 4.67 | 2.40 | 6.00 | 6.00 | 6.00 | 2.60 | 10.00 |
cycles | 4.83 | 4.50 | 10.00 | 10.00 | 6.00 | 4.67 | 2.40 | 6.00 | 6.00 | 6.00 | 2.60 | 10.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 13.36-13.40 |
Stall cycles | 0.00 |
Front-end | 13.17 |
Dispatch | 10.00 |
Overall L1 | 13.17 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 22% |
load | 25% |
store | 33% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 16% |
load | 22% |
store | 20% |
mul | 33% |
add-sub | 33% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 9% |
load | 6% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 15% |
load | 15% |
store | 16% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 15% |
all | 13% |
load | 14% |
store | 15% |
mul | 16% |
add-sub | 16% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 5a3b0 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE9recomputeERNS1_11ParticleSetE> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x484(%RBX),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x1,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 5a8a3 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x3c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x478(%RBX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EDX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 5a876 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x396> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x18(%R13),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EDX,%R9 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOVSXD 0x480(%RBX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x140(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%RSI,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R14,%RAX,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0xd8(%RBX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x100(%RBX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x118(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RDI,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%R8,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RCX,%R11,1),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x10(%RBX),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x48,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x100(%RBX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x140(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x478(%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD (%R11),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD 0x10(%R15),%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
LEA (%RSI,%RSI,2),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x180(%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP %XMM2,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
LEA (%R13,%RCX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMULPD (%R15),%XMM9,%XMM15 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VADDSD 0x10(%R8),%XMM0,%XMM11 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
LEA (%R12,%RSI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VADDPD (%R8),%XMM15,%XMM10 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
VMOVSD %XMM11,0x10(%R8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM10,(%R8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VFNMADD213SD (%RDI),%XMM0,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUNPCKHPD %XMM15,%XMM15,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVSD %XMM15,%XMM15,%XMM14 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMULSD %XMM13,%XMM13,%XMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132SD %XMM15,%XMM1,%XMM14 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VSUBSD %XMM14,%XMM0,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VFMADD132SD (%R9),%XMM6,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM2,(%RDI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 5a876 <_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE+0x396> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN16miniqmcreference19DiracDeterminantRefIN11qmcplusplus13DelayedUpdateIddEEE11evaluateLogERNS1_11ParticleSetERNS1_14ParticleAttribINS1_10TinyVectorIdLj3EEESaIS9_EEERNS7_IdSaIdEEE– | 0.12 | 0.13 |
▼Loop 660 - DiracDeterminantRef.cpp:247-256 - libqmcwfs.so– | 0 | 0 |
○Loop 661 - inner_product.hpp:155-155 - libqmcwfs.so | 0.09 | 0.09 |
○Loop 662 - inner_product.hpp:82-83 - libqmcwfs.so | 0.04 | 0.04 |