Function: _ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6 ... | Module: libqmcwfs.so | Source: einspline_spo_ref.hpp:203-230 [...] | Coverage: 0.97% |
---|
Function: _ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6 ... | Module: libqmcwfs.so | Source: einspline_spo_ref.hpp:203-230 [...] | Coverage: 0.97% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/einspline_spo_ref.hpp: 203 - 230 |
-------------------------------------------------------------------------------- |
203: ScopedTimer local_timer(timer); |
204: |
205: auto u = Lattice.toUnit_floor(P.activeR(iat)); |
206: for (int i = 0; i < nBlocks; ++i) |
207: MultiBsplineEvalRef::evaluate_vgh(einsplines[i], u[0], u[1], u[2], psi[i].data(), grad[i].data(), hess[i].data(), |
208: nSplinesPerBlock); |
209: } |
210: |
211: inline void evaluate(const ParticleSet& P, |
[...] |
219: for (int i = 0; i < nBlocks; ++i) |
220: { |
221: // in real simulation, phase needs to be applied. Here just fake computation |
222: const int first = i * nBlocks; |
223: for (int j = first; j < std::min((i + 1) * nSplinesPerBlock, OrbitalSetSize); j++) |
224: { |
225: psi_v[j] = psi[i][j - first]; |
226: dpsi_v[j] = grad[i][j - first]; |
227: d2psi_v[j] = hess[i].data(0)[j - first]; |
228: } |
229: } |
230: } |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 143 - 143 |
-------------------------------------------------------------------------------- |
143: return (active_ptcl_ == iat) ? active_pos_ : R[iat]; |
/cluster/comp/gcc/13.2.0/include/c++/13.2.0/bits/stl_vector.h: 1126 - 1258 |
-------------------------------------------------------------------------------- |
1126: return *(this->_M_impl._M_start + __n); |
[...] |
1258: { return _M_data_ptr(this->_M_impl._M_start); } |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 145 - 145 |
-------------------------------------------------------------------------------- |
145: X[i] = base[i * offset]; |
/cluster/comp/gcc/13.2.0/include/c++/13.2.0/bits/stl_algobase.h: 238 - 238 |
-------------------------------------------------------------------------------- |
238: if (__b < __a) |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 223 - 229 |
-------------------------------------------------------------------------------- |
223: return X[i]; |
[...] |
229: return X[i]; |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/VectorSoAContainer.h: 231 - 271 |
-------------------------------------------------------------------------------- |
231: inline const AoSElement_t operator[](size_t i) const { return AoSElement_t(myData + i, nGhosts); } |
[...] |
265: inline T* data() { return myData; } |
[...] |
271: inline T* restrict data(size_t i) { return myData + i * nGhosts; } |
0x4c380 PUSH %RBP |
0x4c381 MOV %RSP,%RBP |
0x4c384 PUSH %R15 |
0x4c386 PUSH %R14 |
0x4c388 PUSH %R13 |
0x4c38a MOVSXD %EDX,%R13 |
0x4c38d PUSH %R12 |
0x4c38f MOV %RSI,%R12 |
0x4c392 PUSH %RBX |
0x4c393 MOV %RDI,%RBX |
0x4c396 SUB $0x78,%RSP |
0x4c39a MOV 0x358(%RDI),%R15 |
0x4c3a1 MOV %RCX,-0x88(%RBP) |
0x4c3a8 MOV %R8,-0x90(%RBP) |
0x4c3af MOV %R15,%RDI |
0x4c3b2 MOV %R9,-0x98(%RBP) |
0x4c3b9 CALL 8590 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> |
0x4c3be LEA 0x48(%RBX),%RSI |
0x4c3c2 LEA 0x128(%R12),%RDX |
0x4c3ca CMP 0x124(%R12),%R13D |
0x4c3d2 JE 4c3e2 |
0x4c3d4 MOV 0x40(%R12),%RAX |
0x4c3d9 LEA (%R13,%R13,2),%RDX |
0x4c3de LEA (%RAX,%RDX,8),%RDX |
0x4c3e2 LEA -0x50(%RBP),%RDI |
0x4c3e6 CALL 13670 <_ZNK11qmcplusplus14CrystalLatticeIdLj3EE12toUnit_floorIdEENS_10TinyVectorIdLj3EEERKNS3_IT_Lj3EEE> |
0x4c3eb MOV 0x30(%RBX),%R9D |
0x4c3ef TEST %R9D,%R9D |
0x4c3f2 JLE 4c4f3 |
0x4c3f8 MOV -0x40(%RBP),%R14 |
0x4c3fc MOV -0x48(%RBP),%RCX |
0x4c400 XOR %R12D,%R12D |
0x4c403 VMOVSD -0x50(%RBP),%XMM5 |
0x4c408 VMOVQ %R14,%XMM2 |
0x4c40d VMOVQ %RCX,%XMM1 |
0x4c412 VMOVSD %XMM5,-0x58(%RBP) |
(566) 0x4c417 MOV 0x310(%RBX),%R11 |
(566) 0x4c41e LEA (%R12,%R12,2),%R10 |
(566) 0x4c422 LEA (%R12,%R12,4),%RSI |
(566) 0x4c426 MOV 0x340(%RBX),%RDI |
(566) 0x4c42d MOV 0x328(%RBX),%R8 |
(566) 0x4c434 MOV 0x2f8(%RBX),%RAX |
(566) 0x4c43b SAL $0x3,%RSI |
(566) 0x4c43f VMOVSD %XMM2,-0x68(%RBP) |
(566) 0x4c444 LEA (%R11,%R10,8),%R13 |
(566) 0x4c448 MOV 0x18(%RDI,%RSI,1),%RCX |
(566) 0x4c44d VMOVSD -0x58(%RBP),%XMM0 |
(566) 0x4c452 VMOVSD %XMM1,-0x60(%RBP) |
(566) 0x4c457 MOV 0x18(%R8,%RSI,1),%RDX |
(566) 0x4c45c MOV (%RAX,%R12,8),%RDI |
(566) 0x4c460 LEA 0x1(%R12),%R14 |
(566) 0x4c465 MOV (%R13),%RSI |
(566) 0x4c469 MOVSXD 0x40(%RBX),%R8 |
(566) 0x4c46d CALL 4bac0 <_ZN16miniqmcreference19MultiBsplineEvalRef12evaluate_vghIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_S9_S9_m> |
(566) 0x4c472 CMP %R14D,0x30(%RBX) |
(566) 0x4c476 VMOVSD -0x60(%RBP),%XMM1 |
(566) 0x4c47b VMOVSD -0x68(%RBP),%XMM2 |
(566) 0x4c480 JLE 4c4f3 |
(566) 0x4c482 MOV 0x328(%RBX),%RSI |
(566) 0x4c489 LEA (%R14,%R14,4),%RDX |
(566) 0x4c48d MOV 0x310(%RBX),%RDI |
(566) 0x4c494 LEA (%R14,%R14,2),%R8 |
(566) 0x4c498 SAL $0x3,%RDX |
(566) 0x4c49c MOV 0x340(%RBX),%R9 |
(566) 0x4c4a3 VMOVSD -0x58(%RBP),%XMM0 |
(566) 0x4c4a8 VMOVSD %XMM2,-0x68(%RBP) |
(566) 0x4c4ad MOV 0x2f8(%RBX),%R13 |
(566) 0x4c4b4 LEA (%RDI,%R8,8),%R11 |
(566) 0x4c4b8 MOV 0x18(%RSI,%RDX,1),%R10 |
(566) 0x4c4bd VMOVSD %XMM1,-0x60(%RBP) |
(566) 0x4c4c2 MOV 0x18(%R9,%RDX,1),%RCX |
(566) 0x4c4c7 MOV (%R11),%RSI |
(566) 0x4c4ca ADD $0x2,%R12 |
(566) 0x4c4ce MOV (%R13,%R14,8),%RDI |
(566) 0x4c4d3 MOVSXD 0x40(%RBX),%R8 |
(566) 0x4c4d7 MOV %R10,%RDX |
(566) 0x4c4da CALL 4bac0 <_ZN16miniqmcreference19MultiBsplineEvalRef12evaluate_vghIdEEvPKN11qmcplusplus14bspline_traitsIT_Lj3EE10SplineTypeES4_S4_S4_PS4_S9_S9_m> |
(566) 0x4c4df CMP %R12D,0x30(%RBX) |
(566) 0x4c4e3 VMOVSD -0x60(%RBP),%XMM1 |
(566) 0x4c4e8 VMOVSD -0x68(%RBP),%XMM2 |
(566) 0x4c4ed JG 4c417 |
0x4c4f3 MOV %R15,%RDI |
0x4c4f6 CALL 8480 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> |
0x4c4fb MOV 0x30(%RBX),%R15D |
0x4c4ff MOV %R15D,-0x6c(%RBP) |
0x4c503 TEST %R15D,%R15D |
0x4c506 JLE 4c81a |
0x4c50c MOVSXD -0x6c(%RBP),%RCX |
0x4c510 VMOVD 0x40(%RBX),%XMM2 |
0x4c515 XOR %R15D,%R15D |
0x4c518 XOR %R14D,%R14D |
0x4c51b MOVL $0,-0x60(%RBP) |
0x4c522 VMOVD 0x8(%RBX),%XMM3 |
0x4c527 VMOVDQA %XMM2,%XMM1 |
0x4c52b LEA (%RCX,%RCX,2),%RAX |
0x4c52f LEA (,%RCX,8),%R12 |
0x4c537 MOVQ $0,-0x68(%RBP) |
0x4c53f SAL $0x3,%RAX |
0x4c543 MOV %R12,-0x78(%RBP) |
0x4c547 MOV %RAX,-0x80(%RBP) |
0x4c54b MOVQ $0,-0x58(%RBP) |
0x4c553 NOPL (%RAX,%RAX,1) |
(564) 0x4c558 VPMINSD %XMM3,%XMM1,%XMM0 |
(564) 0x4c55d VMOVD %XMM0,%EDI |
(564) 0x4c561 CMP %EDI,-0x60(%RBP) |
(564) 0x4c564 JGE 4c7eb |
(564) 0x4c56a MOV 0x310(%RBX),%RDX |
(564) 0x4c571 MOV -0x58(%RBP),%R9 |
(564) 0x4c575 XOR %EAX,%EAX |
(564) 0x4c577 MOV 0x340(%RBX),%R8 |
(564) 0x4c57e MOV -0x60(%RBP),%R13D |
(564) 0x4c582 MOV 0x328(%RBX),%R10 |
(564) 0x4c589 MOV (%RDX,%R9,1),%R12 |
(564) 0x4c58d MOV -0x90(%RBP),%R11 |
(564) 0x4c594 SUB %R13D,%EDI |
(564) 0x4c597 MOV 0x18(%R8,%R15,1),%R9 |
(564) 0x4c59c MOV -0x98(%RBP),%R13 |
(564) 0x4c5a3 ADD %R15,%R10 |
(564) 0x4c5a6 SAL $0x3,%RDI |
(564) 0x4c5aa MOV -0x68(%RBP),%RDX |
(564) 0x4c5ae MOV -0x88(%RBP),%R8 |
(564) 0x4c5b5 MOV 0x18(%R10),%RSI |
(564) 0x4c5b9 MOVSXD 0x8(%R10),%RCX |
(564) 0x4c5bd ADD 0x18(%R11),%RDX |
(564) 0x4c5c1 MOV 0x18(%R8),%R11 |
(564) 0x4c5c5 MOV 0x18(%R13),%R8 |
(564) 0x4c5c9 LEA -0x8(%RDI),%R13 |
(564) 0x4c5cd SHR $0x3,%R13 |
(564) 0x4c5d1 LEA (%RSI,%RCX,8),%R10 |
(564) 0x4c5d5 SAL $0x4,%RCX |
(564) 0x4c5d9 INC %R13 |
(564) 0x4c5dc ADD %RSI,%RCX |
(564) 0x4c5df ADD %R14,%R11 |
(564) 0x4c5e2 ADD %R14,%R8 |
(564) 0x4c5e5 AND $0x3,%R13D |
(564) 0x4c5e9 JE 4c6d8 |
(564) 0x4c5ef CMP $0x1,%R13 |
(564) 0x4c5f3 JE 4c688 |
(564) 0x4c5f9 CMP $0x2,%R13 |
(564) 0x4c5fd JE 4c641 |
(564) 0x4c5ff VMOVSD (%R12),%XMM4 |
(564) 0x4c605 ADD $0x18,%RDX |
(564) 0x4c609 VMOVSD %XMM4,(%R11) |
(564) 0x4c60e VMOVSD (%RCX),%XMM8 |
(564) 0x4c612 VMOVSD (%RSI),%XMM6 |
(564) 0x4c616 VMOVSD %XMM8,-0x40(%RBP) |
(564) 0x4c61b MOV -0x40(%RBP),%RAX |
(564) 0x4c61f VMOVHPD (%R10),%XMM6,%XMM7 |
(564) 0x4c624 VMOVDQU %XMM7,-0x18(%RDX) |
(564) 0x4c629 MOV %RAX,-0x8(%RDX) |
(564) 0x4c62d VMOVSD (%R9),%XMM9 |
(564) 0x4c632 MOV $0x8,%EAX |
(564) 0x4c637 VMOVAPD %XMM7,-0x50(%RBP) |
(564) 0x4c63c VMOVSD %XMM9,(%R8) |
(564) 0x4c641 VMOVSD (%R12,%RAX,1),%XMM10 |
(564) 0x4c647 ADD $0x18,%RDX |
(564) 0x4c64b VMOVSD %XMM10,(%R11,%RAX,1) |
(564) 0x4c651 VMOVSD (%RCX,%RAX,1),%XMM13 |
(564) 0x4c656 VMOVSD (%RSI,%RAX,1),%XMM11 |
(564) 0x4c65b VMOVSD %XMM13,-0x40(%RBP) |
(564) 0x4c660 MOV -0x40(%RBP),%R13 |
(564) 0x4c664 VMOVHPD (%R10,%RAX,1),%XMM11,%XMM12 |
(564) 0x4c66a VMOVDQU %XMM12,-0x18(%RDX) |
(564) 0x4c66f MOV %R13,-0x8(%RDX) |
(564) 0x4c673 VMOVSD (%R9,%RAX,1),%XMM14 |
(564) 0x4c679 VMOVAPD %XMM12,-0x50(%RBP) |
(564) 0x4c67e VMOVSD %XMM14,(%R8,%RAX,1) |
(564) 0x4c684 ADD $0x8,%RAX |
(564) 0x4c688 VMOVSD (%R12,%RAX,1),%XMM15 |
(564) 0x4c68e ADD $0x18,%RDX |
(564) 0x4c692 VMOVSD %XMM15,(%R11,%RAX,1) |
(564) 0x4c698 VMOVSD (%RCX,%RAX,1),%XMM4 |
(564) 0x4c69d VMOVSD (%RSI,%RAX,1),%XMM5 |
(564) 0x4c6a2 VMOVSD %XMM4,-0x40(%RBP) |
(564) 0x4c6a7 MOV -0x40(%RBP),%R13 |
(564) 0x4c6ab VMOVHPD (%R10,%RAX,1),%XMM5,%XMM0 |
(564) 0x4c6b1 VMOVDQU %XMM0,-0x18(%RDX) |
(564) 0x4c6b6 MOV %R13,-0x8(%RDX) |
(564) 0x4c6ba VMOVSD (%R9,%RAX,1),%XMM6 |
(564) 0x4c6c0 VMOVAPD %XMM0,-0x50(%RBP) |
(564) 0x4c6c5 VMOVSD %XMM6,(%R8,%RAX,1) |
(564) 0x4c6cb ADD $0x8,%RAX |
(564) 0x4c6cf CMP %RAX,%RDI |
(564) 0x4c6d2 JE 4c7eb |
(565) 0x4c6d8 VMOVSD (%R12,%RAX,1),%XMM7 |
(565) 0x4c6de ADD $0x60,%RDX |
(565) 0x4c6e2 VMOVSD %XMM7,(%R11,%RAX,1) |
(565) 0x4c6e8 VMOVSD (%RCX,%RAX,1),%XMM10 |
(565) 0x4c6ed VMOVSD (%RSI,%RAX,1),%XMM8 |
(565) 0x4c6f2 VMOVSD %XMM10,-0x40(%RBP) |
(565) 0x4c6f7 MOV -0x40(%RBP),%R13 |
(565) 0x4c6fb VMOVHPD (%R10,%RAX,1),%XMM8,%XMM9 |
(565) 0x4c701 VMOVDQU %XMM9,-0x60(%RDX) |
(565) 0x4c706 MOV %R13,-0x50(%RDX) |
(565) 0x4c70a VMOVSD (%R9,%RAX,1),%XMM11 |
(565) 0x4c710 VMOVSD %XMM11,(%R8,%RAX,1) |
(565) 0x4c716 VMOVSD 0x8(%R12,%RAX,1),%XMM12 |
(565) 0x4c71d VMOVSD %XMM12,0x8(%RAX,%R11,1) |
(565) 0x4c724 VMOVSD 0x8(%RCX,%RAX,1),%XMM15 |
(565) 0x4c72a VMOVSD 0x8(%RSI,%RAX,1),%XMM13 |
(565) 0x4c730 VMOVSD %XMM15,-0x40(%RBP) |
(565) 0x4c735 MOV -0x40(%RBP),%R13 |
(565) 0x4c739 VMOVHPD 0x8(%R10,%RAX,1),%XMM13,%XMM14 |
(565) 0x4c740 VMOVDQU %XMM14,-0x48(%RDX) |
(565) 0x4c745 MOV %R13,-0x38(%RDX) |
(565) 0x4c749 VMOVSD 0x8(%R9,%RAX,1),%XMM5 |
(565) 0x4c750 VMOVSD %XMM5,0x8(%RAX,%R8,1) |
(565) 0x4c757 VMOVSD 0x10(%R12,%RAX,1),%XMM0 |
(565) 0x4c75e VMOVSD %XMM0,0x10(%RAX,%R11,1) |
(565) 0x4c765 VMOVSD 0x10(%RCX,%RAX,1),%XMM6 |
(565) 0x4c76b VMOVSD 0x10(%RSI,%RAX,1),%XMM4 |
(565) 0x4c771 VMOVSD %XMM6,-0x40(%RBP) |
(565) 0x4c776 MOV -0x40(%RBP),%R13 |
(565) 0x4c77a VMOVHPD 0x10(%R10,%RAX,1),%XMM4,%XMM7 |
(565) 0x4c781 VMOVDQU %XMM7,-0x30(%RDX) |
(565) 0x4c786 MOV %R13,-0x20(%RDX) |
(565) 0x4c78a VMOVSD 0x10(%R9,%RAX,1),%XMM8 |
(565) 0x4c791 VMOVSD %XMM8,0x10(%RAX,%R8,1) |
(565) 0x4c798 VMOVSD 0x18(%R12,%RAX,1),%XMM9 |
(565) 0x4c79f VMOVSD %XMM9,0x18(%RAX,%R11,1) |
(565) 0x4c7a6 VMOVSD 0x18(%RCX,%RAX,1),%XMM12 |
(565) 0x4c7ac VMOVSD 0x18(%RSI,%RAX,1),%XMM10 |
(565) 0x4c7b2 VMOVSD %XMM12,-0x40(%RBP) |
(565) 0x4c7b7 MOV -0x40(%RBP),%R13 |
(565) 0x4c7bb VMOVHPD 0x18(%R10,%RAX,1),%XMM10,%XMM11 |
(565) 0x4c7c2 VMOVDQU %XMM11,-0x18(%RDX) |
(565) 0x4c7c7 MOV %R13,-0x8(%RDX) |
(565) 0x4c7cb VMOVSD 0x18(%R9,%RAX,1),%XMM13 |
(565) 0x4c7d2 ADD $0x20,%RAX |
(565) 0x4c7d6 VMOVAPD %XMM11,-0x50(%RBP) |
(565) 0x4c7db VMOVSD %XMM13,-0x8(%RAX,%R8,1) |
(565) 0x4c7e2 CMP %RAX,%RDI |
(565) 0x4c7e5 JNE 4c6d8 |
(564) 0x4c7eb ADDQ $0x18,-0x58(%RBP) |
(564) 0x4c7f0 MOV -0x6c(%RBP),%EDI |
(564) 0x4c7f3 VPADDD %XMM2,%XMM1,%XMM1 |
(564) 0x4c7f7 ADD $0x28,%R15 |
(564) 0x4c7fb MOV -0x78(%RBP),%R12 |
(564) 0x4c7ff MOV -0x80(%RBP),%RSI |
(564) 0x4c803 MOV -0x58(%RBP),%R9 |
(564) 0x4c807 ADD %EDI,-0x60(%RBP) |
(564) 0x4c80a ADD %RSI,-0x68(%RBP) |
(564) 0x4c80e ADD %R12,%R14 |
(564) 0x4c811 CMP %RSI,%R9 |
(564) 0x4c814 JNE 4c558 |
0x4c81a ADD $0x78,%RSP |
0x4c81e POP %RBX |
0x4c81f POP %R12 |
0x4c821 POP %R13 |
0x4c823 POP %R14 |
0x4c825 POP %R15 |
0x4c827 POP %RBP |
0x4c828 RET |
0x4c829 NOP |
0x4c82a NOPW (%RAX,%RAX,1) |
Path / |
Source file and lines | einspline_spo_ref.hpp:203-230 |
Module | libqmcwfs.so |
nb instructions | 67 |
nb uops | 70 |
loop length | 274 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
micro-operation queue | 11.67 cycles |
front end | 11.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.07 | 6.00 | 6.00 | 9.50 | 2.00 | 2.50 | 9.50 | 9.50 | 9.50 | 1.93 | 6.00 |
cycles | 2.50 | 2.07 | 6.00 | 6.00 | 9.50 | 2.00 | 2.50 | 9.50 | 9.50 | 9.50 | 1.93 | 6.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 11.27 |
Stall cycles | 0.00 |
Front-end | 11.67 |
Dispatch | 9.50 |
Overall L1 | 11.67 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 9% |
load | 8% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOVSXD %EDX,%R13 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB $0x78,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 8590 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA 0x48(%RBX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x128(%R12),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP 0x124(%R12),%R13D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 4c3e2 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x62> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x40(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R13,%R13,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RAX,%RDX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x50(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 13670 <_ZNK11qmcplusplus14CrystalLatticeIdLj3EE12toUnit_floorIdEENS_10TinyVectorIdLj3EEERKNS3_IT_Lj3EEE> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RBX),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R9D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4c4f3 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x173> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD -0x50(%RBP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVQ %R14,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVQ %RCX,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVSD %XMM5,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 8480 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RBX),%R15D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15D,-0x6c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %R15D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4c81a <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x49a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD -0x6c(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVD 0x40(%RBX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVL $0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVD 0x8(%RBX),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVDQA %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
LEA (%RCX,%RCX,2),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%RCX,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVQ $0,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R12,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x78,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | einspline_spo_ref.hpp:203-230 |
Module | libqmcwfs.so |
nb instructions | 67 |
nb uops | 70 |
loop length | 274 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
micro-operation queue | 11.67 cycles |
front end | 11.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.50 | 2.07 | 6.00 | 6.00 | 9.50 | 2.00 | 2.50 | 9.50 | 9.50 | 9.50 | 1.93 | 6.00 |
cycles | 2.50 | 2.07 | 6.00 | 6.00 | 9.50 | 2.00 | 2.50 | 9.50 | 9.50 | 9.50 | 1.93 | 6.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 11.27 |
Stall cycles | 0.00 |
Front-end | 11.67 |
Dispatch | 9.50 |
Overall L1 | 11.67 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 9% |
load | 8% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOVSXD %EDX,%R13 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB $0x78,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R9,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 8590 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA 0x48(%RBX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x128(%R12),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP 0x124(%R12),%R13D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 4c3e2 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x62> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x40(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R13,%R13,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RAX,%RDX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x50(%RBP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 13670 <_ZNK11qmcplusplus14CrystalLatticeIdLj3EE12toUnit_floorIdEENS_10TinyVectorIdLj3EEERKNS3_IT_Lj3EEE> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RBX),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R9D,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4c4f3 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x173> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD -0x50(%RBP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVQ %R14,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVQ %RCX,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VMOVSD %XMM5,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 8480 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x30(%RBX),%R15D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15D,-0x6c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %R15D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4c81a <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_+0x49a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD -0x6c(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVD 0x40(%RBX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVL $0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVD 0x8(%RBX),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VMOVDQA %XMM2,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
LEA (%RCX,%RCX,2),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%RCX,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVQ $0,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R12,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x78,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEERNS6_INS2_10TinyVectorIdLj3EEESaISB_EEES9_– | 0.97 | 1.08 |
▼Loop 564 - einspline_spo_ref.hpp:219-227 - libqmcwfs.so– | 0 | 0.01 |
○Loop 565 - einspline_spo_ref.hpp:223-227 - libqmcwfs.so | 0.96 | 1.02 |
○Loop 566 - einspline_spo_ref.hpp:206-207 - libqmcwfs.so | 0 | 0.01 |