Function: _ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6 ... | Module: exec | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 23.91% |
---|
Function: _ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6 ... | Module: exec | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 23.91% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/einspline_spo_ref.hpp: 172 - 189 |
-------------------------------------------------------------------------------- |
172: ScopedTimer local_timer(timer); |
173: |
174: auto u = Lattice.toUnit_floor(P.activeR(iat)); |
175: for (int i = 0; i < nBlocks; ++i) |
176: MultiBsplineEvalRef::evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); |
177: } |
178: |
179: inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) |
180: { |
181: evaluate_v(P, iat); |
182: |
183: for (int i = 0; i < nBlocks; ++i) |
184: { |
185: // in real simulation, phase needs to be applied. Here just fake computation |
186: const int first = i * nBlocks; |
187: std::copy_n(psi[i].data(), std::min((i + 1) * nSplinesPerBlock, OrbitalSetSize) - first, psi_v.data() + first); |
188: } |
189: } |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineEvalHelper.hpp: 45 - 47 |
-------------------------------------------------------------------------------- |
45: T sf = std::floor(x); |
46: T dx2 = x - sf; |
47: int ind2 = std::min(std::max(0, static_cast<int>(sf)), nmax); |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 143 - 143 |
-------------------------------------------------------------------------------- |
143: return (active_ptcl_ == iat) ? active_pos_ : R[iat]; |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineData.hpp: 54 - 57 |
-------------------------------------------------------------------------------- |
54: a[0] = ((A00 * tx + A01) * tx + A02) * tx + A03; |
55: a[1] = ((A10 * tx + A11) * tx + A12) * tx + A13; |
56: a[2] = ((A20 * tx + A21) * tx + A22) * tx + A23; |
57: a[3] = ((A30 * tx + A31) * tx + A32) * tx + A33; |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_vector.h: 1046 - 1169 |
-------------------------------------------------------------------------------- |
1046: return *(this->_M_impl._M_start + __n); |
[...] |
1169: { return _M_data_ptr(this->_M_impl._M_start); } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_algobase.h: 235 - 924 |
-------------------------------------------------------------------------------- |
235: if (__b < __a) |
[...] |
429: const ptrdiff_t _Num = __last - __first; |
430: if (_Num) |
431: __builtin_memmove(__result, __first, sizeof(_Tp) * _Num); |
[...] |
923: for (; __first != __last; ++__first) |
924: *__first = __tmp; |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 61 - 61 |
-------------------------------------------------------------------------------- |
61: for (size_t d = 0; d < D; ++d) |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorTensorOps.h: 150 - 152 |
-------------------------------------------------------------------------------- |
150: return TinyVector<Type_t, 3>(lhs[0] * rhs[0] + lhs[1] * rhs[3] + lhs[2] * rhs[6], |
151: lhs[0] * rhs[1] + lhs[1] * rhs[4] + lhs[2] * rhs[7], |
152: lhs[0] * rhs[2] + lhs[1] * rhs[5] + lhs[2] * rhs[8]); |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/Tensor.h: 213 - 213 |
-------------------------------------------------------------------------------- |
213: inline Type_t operator[](unsigned int i) const { return X[i]; } |
/usr/lib/gcc/x86_64-redhat-linux/11/../../../../include/c++/11/bits/stl_algo.h: 725 - 751 |
-------------------------------------------------------------------------------- |
725: { return std::copy(__first, __first + __n, __result); } |
[...] |
751: if (__n2 <= 0) |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Particle/Lattice/CrystalLattice.h: 170 - 170 |
-------------------------------------------------------------------------------- |
170: if (-std::numeric_limits<T1>::epsilon() < val_dot[i] && val_dot[i] < 0) |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineRef.hpp: 42 - 71 |
-------------------------------------------------------------------------------- |
42: x -= spline_m->x_grid.start; |
43: y -= spline_m->y_grid.start; |
44: z -= spline_m->z_grid.start; |
45: T tx, ty, tz; |
46: int ix, iy, iz; |
47: spline2::getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); |
48: spline2::getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); |
49: spline2::getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); |
[...] |
56: const intptr_t xs = spline_m->x_stride; |
57: const intptr_t ys = spline_m->y_stride; |
58: const intptr_t zs = spline_m->z_stride; |
59: |
60: constexpr T zero(0); |
61: std::fill(vals, vals + num_splines, zero); |
62: |
63: for (size_t i = 0; i < 4; i++) |
64: for (size_t j = 0; j < 4; j++) |
65: { |
66: const T pre00 = a[i] * b[j]; |
67: const T* restrict coefs = spline_m->coefs + (ix + i) * xs + (iy + j) * ys + iz * zs; |
68: for (size_t n = 0; n < num_splines; n++) |
69: vals[n] += pre00 * |
70: (c[0] * coefs[n] + c[1] * coefs[n + zs] + c[2] * coefs[n + 2 * zs] + |
71: c[3] * coefs[n + 3 * zs]); |
0x43c210 PUSH %RBP |
0x43c211 MOV %RSP,%RBP |
0x43c214 PUSH %R15 |
0x43c216 PUSH %R14 |
0x43c218 PUSH %R13 |
0x43c21a PUSH %R12 |
0x43c21c PUSH %RBX |
0x43c21d SUB $0x148,%RSP |
0x43c224 MOV %RCX,%RBX |
0x43c227 MOV %EDX,%R12D |
0x43c22a MOV %RSI,%R13 |
0x43c22d MOV %RDI,%R14 |
0x43c230 MOV 0x358(%RDI),%RDI |
0x43c237 MOV %RDI,-0xc0(%RBP) |
0x43c23e CALL 48ab60 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> |
0x43c243 MOVSXD %R12D,%RAX |
0x43c246 LEA (%RAX,%RAX,2),%RCX |
0x43c24a SAL $0x3,%RCX |
0x43c24e ADD 0x40(%R13),%RCX |
0x43c252 CMP %EAX,0x124(%R13) |
0x43c259 LEA 0x128(%R13),%RAX |
0x43c260 CMOVNE %RCX,%RAX |
0x43c264 VMOVSD 0xf0(%R14),%XMM0 |
0x43c26d VMOVUPD (%RAX),%XMM1 |
0x43c271 VMULSD 0xd8(%R14),%XMM1,%XMM3 |
0x43c27a VFMADD231SD 0x8(%RAX),%XMM0,%XMM3 |
0x43c280 VMOVDDUP 0x10(%RAX),%XMM2 |
0x43c285 VFMADD231SD 0x108(%R14),%XMM2,%XMM3 |
0x43c28e VMOVUPD 0xe0(%R14),%XMM0 |
0x43c297 VMOVSD 0xf8(%R14),%XMM4 |
0x43c2a0 VSHUFPD $0x1,%XMM1,%XMM1,%XMM5 |
0x43c2a5 VUNPCKLPD 0xe8(%R14),%XMM4,%XMM4 |
0x43c2ae VMOVHPD 0x100(%R14),%XMM0,%XMM0 |
0x43c2b7 VMULPD %XMM5,%XMM4,%XMM4 |
0x43c2bb VFMADD213PD %XMM4,%XMM1,%XMM0 |
0x43c2c0 VFMADD231PD 0x110(%R14),%XMM2,%XMM0 |
0x43c2c9 VXORPD %XMM1,%XMM1,%XMM1 |
0x43c2cd VCMPPD $0x1,%XMM1,%XMM3,%XMM4 |
0x43c2d2 VMOVSD 0xbbdae(%RIP),%XMM2 |
0x43c2da VCMPPD $0x1,%XMM3,%XMM2,%XMM5 |
0x43c2df VANDPD %XMM4,%XMM5,%XMM4 |
0x43c2e3 VMOVD %XMM4,%EAX |
0x43c2e7 VXORPD %XMM4,%XMM4,%XMM4 |
0x43c2eb VMOVUPD %XMM4,-0x110(%RBP) |
0x43c2f3 TEST $0x1,%AL |
0x43c2f5 JNE 43c309 |
0x43c2f7 VROUNDSD $0x9,%XMM3,%XMM3,%XMM4 |
0x43c2fd VSUBSD %XMM4,%XMM3,%XMM3 |
0x43c301 VMOVUPD %XMM3,-0x110(%RBP) |
0x43c309 VXORPD %XMM3,%XMM3,%XMM3 |
0x43c30d VCMPPD $0x1,%XMM3,%XMM0,%XMM3 |
0x43c312 VMOVMSKPD %XMM3,%EAX |
0x43c316 VROUNDSD $0x9,%XMM0,%XMM0,%XMM3 |
0x43c31c VSUBSD %XMM3,%XMM0,%XMM3 |
0x43c320 VXORPD %XMM4,%XMM4,%XMM4 |
0x43c324 TEST $0x1,%AL |
0x43c326 JNE 43c32c |
0x43c328 VMOVAPD %XMM3,%XMM4 |
0x43c32c MOV %RBX,-0xb8(%RBP) |
0x43c333 VSHUFPD $0x1,%XMM0,%XMM0,%XMM5 |
0x43c338 VROUNDSD $0x9,%XMM5,%XMM5,%XMM6 |
0x43c33e VSUBSD %XMM6,%XMM5,%XMM6 |
0x43c342 TEST $0x2,%AL |
0x43c344 JNE 43c34a |
0x43c346 VMOVAPD %XMM6,%XMM1 |
0x43c34a MOV %R14,-0x80(%RBP) |
0x43c34e MOV 0x30(%R14),%EAX |
0x43c352 MOV %RAX,-0x88(%RBP) |
0x43c359 TEST %EAX,%EAX |
0x43c35b JLE 43c8c9 |
0x43c361 VCMPSD $0x1,%XMM0,%XMM2,%XMM0 |
0x43c366 VBLENDVPD %XMM0,%XMM4,%XMM3,%XMM0 |
0x43c36c VMOVUPD %XMM0,-0x130(%RBP) |
0x43c374 VCMPSD $0x1,%XMM5,%XMM2,%XMM0 |
0x43c379 VBLENDVPD %XMM0,%XMM1,%XMM6,%XMM0 |
0x43c37f VMOVUPD %XMM0,-0x120(%RBP) |
0x43c387 MOV -0x80(%RBP),%RAX |
0x43c38b MOV 0x2f8(%RAX),%RCX |
0x43c392 MOV %RCX,-0xd0(%RBP) |
0x43c399 MOV 0x310(%RAX),%RCX |
0x43c3a0 MOV %RCX,-0xc8(%RBP) |
0x43c3a7 MOVSXD 0x40(%RAX),%R14 |
0x43c3ab LEA (,%R14,8),%RDX |
0x43c3b3 CMP $0x1,%R14 |
0x43c3b7 MOV %R14,%R15 |
0x43c3ba ADC $0,%R15 |
0x43c3be DECQ -0x88(%RBP) |
0x43c3c5 MOV %R15,%RAX |
0x43c3c8 SHR $0x1,%RAX |
0x43c3cb MOV %RAX,-0xf8(%RBP) |
0x43c3d2 MOV %R15,%RBX |
0x43c3d5 AND $-0x2,%RBX |
0x43c3d9 XOR %ECX,%ECX |
0x43c3db VMOVSD 0xbbcad(%RIP),%XMM10 |
0x43c3e3 VMOVSD 0xb8c2d(%RIP),%XMM11 |
0x43c3eb MOV %RDX,-0x90(%RBP) |
0x43c3f2 JMP 43c41f |
0x43c3f4 NOPW %CS:(%RAX,%RAX,1) |
(855) 0x43c400 MOV -0xf0(%RBP),%RCX |
(855) 0x43c407 CMP -0x88(%RBP),%RCX |
(855) 0x43c40e LEA 0x1(%RCX),%RCX |
(855) 0x43c412 MOV -0x90(%RBP),%RDX |
(855) 0x43c419 JE 43c8c9 |
(855) 0x43c41f MOV -0xd0(%RBP),%RAX |
(855) 0x43c426 MOV (%RAX,%RCX,8),%R12 |
(855) 0x43c42a MOV %RCX,-0xf0(%RBP) |
(855) 0x43c431 LEA (%RCX,%RCX,2),%RAX |
(855) 0x43c435 MOV -0xc8(%RBP),%RCX |
(855) 0x43c43c MOV (%RCX,%RAX,8),%R13 |
(855) 0x43c440 VMOVUPD -0x110(%RBP),%XMM0 |
(855) 0x43c448 VSUBSD 0x28(%R12),%XMM0,%XMM0 |
(855) 0x43c44f VMOVUPD -0x130(%RBP),%XMM1 |
(855) 0x43c457 VSUBSD 0x50(%R12),%XMM1,%XMM1 |
(855) 0x43c45e VMOVUPD -0x120(%RBP),%XMM2 |
(855) 0x43c466 VSUBSD 0x78(%R12),%XMM2,%XMM2 |
(855) 0x43c46d VMULSD 0x48(%R12),%XMM0,%XMM0 |
(855) 0x43c474 MOVSXD 0x38(%R12),%R9 |
(855) 0x43c479 VROUNDSD $0x9,%XMM0,%XMM0,%XMM14 |
(855) 0x43c47f VMULSD 0x70(%R12),%XMM1,%XMM1 |
(855) 0x43c486 VSUBSD %XMM14,%XMM0,%XMM0 |
(855) 0x43c48b MOVSXD 0x60(%R12),%RAX |
(855) 0x43c490 MOV %RAX,-0x70(%RBP) |
(855) 0x43c494 VMULSD %XMM0,%XMM10,%XMM3 |
(855) 0x43c498 VSUBSD %XMM3,%XMM11,%XMM4 |
(855) 0x43c49c VMULSD %XMM0,%XMM0,%XMM5 |
(855) 0x43c4a0 VMOVDDUP %XMM0,%XMM6 |
(855) 0x43c4a4 VMOVDDUP 0xb8b6c(%RIP),%XMM8 |
(855) 0x43c4ac VBLENDPD $0x1,%XMM4,%XMM8,%XMM4 |
(855) 0x43c4b2 VMULPD %XMM4,%XMM6,%XMM4 |
(855) 0x43c4b6 VMOVUPD 0xbbb52(%RIP),%XMM9 |
(855) 0x43c4be VADDPD %XMM4,%XMM9,%XMM7 |
(855) 0x43c4c2 VUNPCKLPD %XMM5,%XMM6,%XMM6 |
(855) 0x43c4c6 VMOVUPD 0xbbb52(%RIP),%XMM12 |
(855) 0x43c4ce VFMADD213PD %XMM12,%XMM7,%XMM6 |
(855) 0x43c4d3 VROUNDSD $0x9,%XMM1,%XMM1,%XMM7 |
(855) 0x43c4d9 VSUBSD %XMM7,%XMM1,%XMM1 |
(855) 0x43c4dd VMULSD 0x98(%R12),%XMM2,%XMM2 |
(855) 0x43c4e7 MOVSXD 0x88(%R12),%RAX |
(855) 0x43c4ef MOV %RAX,-0x68(%RBP) |
(855) 0x43c4f3 VMOVUPD %XMM6,-0x170(%RBP) |
(855) 0x43c4fb VSHUFPD $0x1,%XMM4,%XMM4,%XMM4 |
(855) 0x43c500 VSUBSD %XMM4,%XMM11,%XMM4 |
(855) 0x43c504 VFMADD213SD %XMM11,%XMM0,%XMM4 |
(855) 0x43c509 VFMADD213SD %XMM10,%XMM0,%XMM4 |
(855) 0x43c50e VMOVSD %XMM4,-0x160(%RBP) |
(855) 0x43c516 VMULSD %XMM5,%XMM3,%XMM0 |
(855) 0x43c51a VMOVSD %XMM0,-0x158(%RBP) |
(855) 0x43c522 VMULSD %XMM1,%XMM10,%XMM0 |
(855) 0x43c526 VSUBSD %XMM0,%XMM11,%XMM3 |
(855) 0x43c52a VMULSD %XMM1,%XMM1,%XMM4 |
(855) 0x43c52e VMOVDDUP %XMM1,%XMM5 |
(855) 0x43c532 VBLENDPD $0x1,%XMM3,%XMM8,%XMM3 |
(855) 0x43c538 VMULPD %XMM3,%XMM5,%XMM3 |
(855) 0x43c53c VADDPD %XMM3,%XMM9,%XMM6 |
(855) 0x43c540 VUNPCKLPD %XMM4,%XMM5,%XMM5 |
(855) 0x43c544 VFMADD213PD %XMM12,%XMM6,%XMM5 |
(855) 0x43c549 VMOVUPD %XMM5,-0x150(%RBP) |
(855) 0x43c551 VSHUFPD $0x1,%XMM3,%XMM3,%XMM3 |
(855) 0x43c556 VSUBSD %XMM3,%XMM11,%XMM3 |
(855) 0x43c55a VFMADD213SD %XMM11,%XMM1,%XMM3 |
(855) 0x43c55f VFMADD213SD %XMM10,%XMM1,%XMM3 |
(855) 0x43c564 VMOVSD %XMM3,-0x140(%RBP) |
(855) 0x43c56c VROUNDSD $0x9,%XMM2,%XMM2,%XMM5 |
(855) 0x43c572 VMULSD %XMM4,%XMM0,%XMM0 |
(855) 0x43c576 VMOVSD %XMM0,-0x138(%RBP) |
(855) 0x43c57e VSUBSD %XMM5,%XMM2,%XMM2 |
(855) 0x43c582 VMULSD %XMM2,%XMM10,%XMM1 |
(855) 0x43c586 VSUBSD %XMM1,%XMM11,%XMM12 |
(855) 0x43c58a VFMADD213SD 0xb8a9d(%RIP),%XMM2,%XMM12 |
(855) 0x43c593 VMULSD %XMM2,%XMM11,%XMM0 |
(855) 0x43c597 VADDSD 0xbbaf9(%RIP),%XMM0,%XMM13 |
(855) 0x43c59f VMULSD %XMM2,%XMM2,%XMM3 |
(855) 0x43c5a3 VFMADD213SD 0xbbaf4(%RIP),%XMM3,%XMM13 |
(855) 0x43c5ac MOV 0x10(%R12),%RAX |
(855) 0x43c5b1 MOV %RAX,-0x38(%RBP) |
(855) 0x43c5b5 MOV 0x18(%R12),%RAX |
(855) 0x43c5ba MOV %RAX,-0x78(%RBP) |
(855) 0x43c5be MOV 0x20(%R12),%RAX |
(855) 0x43c5c3 MOV %RAX,-0x60(%RBP) |
(855) 0x43c5c7 TEST %RDX,%RDX |
(855) 0x43c5ca JE 43c671 |
(855) 0x43c5d0 MOV %R13,%RDI |
(855) 0x43c5d3 XOR %ESI,%ESI |
(855) 0x43c5d5 MOV -0x90(%RBP),%RDX |
(855) 0x43c5dc VMOVUPD %XMM12,-0xb0(%RBP) |
(855) 0x43c5e4 VMOVUPD %XMM13,-0xa0(%RBP) |
(855) 0x43c5ec MOV %R9,-0x58(%RBP) |
(855) 0x43c5f0 VMOVSD %XMM14,-0x50(%RBP) |
(855) 0x43c5f5 VMOVSD %XMM7,-0x48(%RBP) |
(855) 0x43c5fa VMOVSD %XMM1,-0x40(%RBP) |
(855) 0x43c5ff VMOVSD %XMM2,-0x30(%RBP) |
(855) 0x43c604 VMOVSD %XMM3,-0xe8(%RBP) |
(855) 0x43c60c VMOVSD %XMM5,-0xe0(%RBP) |
(855) 0x43c614 VMOVSD %XMM0,-0xd8(%RBP) |
(855) 0x43c61c CALL 4e5850 <_intel_fast_memset> |
(855) 0x43c621 VMOVSD -0xd8(%RBP),%XMM0 |
(855) 0x43c629 VMOVSD -0xe0(%RBP),%XMM5 |
(855) 0x43c631 VMOVSD -0xe8(%RBP),%XMM3 |
(855) 0x43c639 VMOVSD -0x30(%RBP),%XMM2 |
(855) 0x43c63e VMOVSD -0x40(%RBP),%XMM1 |
(855) 0x43c643 VMOVSD -0x48(%RBP),%XMM7 |
(855) 0x43c648 VMOVSD -0x50(%RBP),%XMM14 |
(855) 0x43c64d MOV -0x58(%RBP),%R9 |
(855) 0x43c651 VMOVUPD -0xa0(%RBP),%XMM13 |
(855) 0x43c659 VMOVUPD -0xb0(%RBP),%XMM12 |
(855) 0x43c661 VMOVSD 0xb89af(%RIP),%XMM11 |
(855) 0x43c669 VMOVSD 0xbba1f(%RIP),%XMM10 |
(855) 0x43c671 VCVTTSD2SI %XMM14,%EDX |
(855) 0x43c676 VCVTTSD2SI %XMM7,%ESI |
(855) 0x43c67a VCVTTSD2SI %XMM5,%ECX |
(855) 0x43c67e VFMADD213SD %XMM10,%XMM2,%XMM12 |
(855) 0x43c683 VSUBSD %XMM0,%XMM11,%XMM0 |
(855) 0x43c687 VFMADD213SD %XMM11,%XMM2,%XMM0 |
(855) 0x43c68c VFMADD213SD %XMM10,%XMM2,%XMM0 |
(855) 0x43c691 MOV 0x8(%R12),%RAX |
(855) 0x43c696 VMULSD %XMM3,%XMM1,%XMM1 |
(855) 0x43c69a VMOVDDUP %XMM13,%XMM2 |
(855) 0x43c69f VMOVDDUP %XMM12,%XMM3 |
(855) 0x43c6a4 VMOVDDUP %XMM0,%XMM4 |
(855) 0x43c6a8 VMOVDDUP %XMM1,%XMM5 |
(855) 0x43c6ac MOV -0x68(%RBP),%R11 |
(855) 0x43c6b0 DEC %R11 |
(855) 0x43c6b3 MOV %ECX,%EDI |
(855) 0x43c6b5 SAR $0x1f,%EDI |
(855) 0x43c6b8 ANDN %ECX,%EDI,%ECX |
(855) 0x43c6bd CMP %RCX,%R11 |
(855) 0x43c6c0 CMOVGE %RCX,%R11 |
(855) 0x43c6c4 MOV -0x60(%RBP),%RCX |
(855) 0x43c6c8 MOV %RCX,%R8 |
(855) 0x43c6cb IMUL %R11,%R8 |
(855) 0x43c6cf MOV -0x70(%RBP),%R12 |
(855) 0x43c6d3 DEC %R12 |
(855) 0x43c6d6 MOV %ESI,%EDI |
(855) 0x43c6d8 SAR $0x1f,%EDI |
(855) 0x43c6db ANDN %ESI,%EDI,%ESI |
(855) 0x43c6e0 CMP %RSI,%R12 |
(855) 0x43c6e3 CMOVGE %RSI,%R12 |
(855) 0x43c6e7 IMUL -0x78(%RBP),%R12 |
(855) 0x43c6ec DEC %R9 |
(855) 0x43c6ef MOV %EDX,%ESI |
(855) 0x43c6f1 SAR $0x1f,%ESI |
(855) 0x43c6f4 ANDN %EDX,%ESI,%EDX |
(855) 0x43c6f9 CMP %RDX,%R9 |
(855) 0x43c6fc CMOVGE %RDX,%R9 |
(855) 0x43c700 IMUL -0x38(%RBP),%R9 |
(855) 0x43c705 LEA 0x2(%R11),%RDX |
(855) 0x43c709 IMUL %RCX,%RDX |
(855) 0x43c70d LEA 0x3(%R11),%RSI |
(855) 0x43c711 IMUL %RCX,%RSI |
(855) 0x43c715 LEA (%RBX,%R9,1),%RDI |
(855) 0x43c719 ADD %R12,%RDI |
(855) 0x43c71c LEA (%RCX,%R8,1),%R10 |
(855) 0x43c720 ADD %RDI,%R10 |
(855) 0x43c723 MOV %R10,-0x70(%RBP) |
(855) 0x43c727 LEA (%RDI,%R8,1),%R10 |
(855) 0x43c72b MOV %R10,-0x68(%RBP) |
(855) 0x43c72f LEA (%RDI,%RDX,1),%R10 |
(855) 0x43c733 MOV %R10,-0xb0(%RBP) |
(855) 0x43c73a ADD %RSI,%RDI |
(855) 0x43c73d MOV %RDI,-0xa0(%RBP) |
(855) 0x43c744 ADD %R12,%R9 |
(855) 0x43c747 ADD %R9,%RSI |
(855) 0x43c74a LEA (%RAX,%RSI,8),%RSI |
(855) 0x43c74e MOV -0x38(%RBP),%RDI |
(855) 0x43c752 LEA (,%RDI,8),%RDI |
(855) 0x43c75a MOV %RDI,-0x30(%RBP) |
(855) 0x43c75e INC %R11 |
(855) 0x43c761 IMUL %RCX,%R11 |
(855) 0x43c765 MOV -0x78(%RBP),%RCX |
(855) 0x43c769 LEA (,%RCX,8),%R10 |
(855) 0x43c771 ADD %R9,%RDX |
(855) 0x43c774 LEA (%RAX,%RDX,8),%RDX |
(855) 0x43c778 ADD %R9,%R11 |
(855) 0x43c77b LEA (%RAX,%R11,8),%RDI |
(855) 0x43c77f ADD %R8,%R9 |
(855) 0x43c782 LEA (%RAX,%R9,8),%R12 |
(855) 0x43c786 MOV %RDI,%R9 |
(855) 0x43c789 XOR %ECX,%ECX |
(855) 0x43c78b JMP 43c7c2 |
0x43c78d NOPL (%RAX) |
(856) 0x43c790 MOV -0x60(%RBP),%RSI |
(856) 0x43c794 MOV -0x30(%RBP),%RCX |
(856) 0x43c798 ADD %RCX,%RSI |
(856) 0x43c79b MOV -0x58(%RBP),%RDX |
(856) 0x43c79f ADD %RCX,%RDX |
(856) 0x43c7a2 MOV -0x50(%RBP),%R9 |
(856) 0x43c7a6 ADD %RCX,%R9 |
(856) 0x43c7a9 MOV -0x48(%RBP),%R12 |
(856) 0x43c7ad ADD %RCX,%R12 |
(856) 0x43c7b0 MOV -0x40(%RBP),%RCX |
(856) 0x43c7b4 CMP $0x3,%RCX |
(856) 0x43c7b8 LEA 0x1(%RCX),%RCX |
(856) 0x43c7bc JE 43c400 |
(856) 0x43c7c2 VMOVSD -0x170(%RBP,%RCX,8),%XMM6 |
(856) 0x43c7cb MOV -0x38(%RBP),%R11 |
(856) 0x43c7cf MOV %RCX,-0x40(%RBP) |
(856) 0x43c7d3 IMUL %RCX,%R11 |
(856) 0x43c7d7 MOV %R12,-0x48(%RBP) |
(856) 0x43c7db MOV %R9,-0x50(%RBP) |
(856) 0x43c7df MOV %RDX,-0x58(%RBP) |
(856) 0x43c7e3 MOV %RDX,%RCX |
(856) 0x43c7e6 MOV %RSI,-0x60(%RBP) |
(856) 0x43c7ea MOV %RSI,%RDX |
(856) 0x43c7ed XOR %ESI,%ESI |
(856) 0x43c7ef JMP 43c81a |
0x43c7f1 NOPW %CS:(%RAX,%RAX,1) |
(857) 0x43c800 ADD %R10,%RDX |
(857) 0x43c803 ADD %R10,%RCX |
(857) 0x43c806 ADD %R10,%R9 |
(857) 0x43c809 ADD %R10,%R12 |
(857) 0x43c80c CMP $0x3,%RSI |
(857) 0x43c810 LEA 0x1(%RSI),%RSI |
(857) 0x43c814 JE 43c790 |
(857) 0x43c81a TEST %R14D,%R14D |
(857) 0x43c81d JE 43c800 |
(857) 0x43c81f VMULSD -0x150(%RBP,%RSI,8),%XMM6,%XMM7 |
(857) 0x43c828 CMP $0x2,%R15 |
(857) 0x43c82c JB 43c86f |
(857) 0x43c82e VMOVDDUP %XMM7,%XMM8 |
(857) 0x43c832 MOV -0xf8(%RBP),%RDI |
(857) 0x43c839 XOR %R8D,%R8D |
(857) 0x43c83c NOPL (%RAX) |
(858) 0x43c840 VMULPD (%R12,%R8,1),%XMM3,%XMM9 |
(858) 0x43c846 VFMADD231PD (%R9,%R8,1),%XMM2,%XMM9 |
(858) 0x43c84c VFMADD231PD (%RCX,%R8,1),%XMM4,%XMM9 |
(858) 0x43c852 VFMADD231PD (%RDX,%R8,1),%XMM5,%XMM9 |
(858) 0x43c858 VFMADD213PD (%R13,%R8,1),%XMM8,%XMM9 |
(858) 0x43c85f VMOVUPD %XMM9,(%R13,%R8,1) |
(858) 0x43c866 ADD $0x10,%R8 |
(858) 0x43c86a DEC %RDI |
(858) 0x43c86d JNE 43c840 |
(857) 0x43c86f CMP %R15,%RBX |
(857) 0x43c872 JAE 43c800 |
(857) 0x43c874 MOV -0x78(%RBP),%RDI |
(857) 0x43c878 IMUL %RSI,%RDI |
(857) 0x43c87c ADD %R11,%RDI |
(857) 0x43c87f MOV -0x68(%RBP),%R8 |
(857) 0x43c883 ADD %RDI,%R8 |
(857) 0x43c886 VMULSD (%RAX,%R8,8),%XMM12,%XMM8 |
(857) 0x43c88c MOV -0x70(%RBP),%R8 |
(857) 0x43c890 ADD %RDI,%R8 |
(857) 0x43c893 VFMADD231SD (%RAX,%R8,8),%XMM13,%XMM8 |
(857) 0x43c899 MOV -0xb0(%RBP),%R8 |
(857) 0x43c8a0 ADD %RDI,%R8 |
(857) 0x43c8a3 VFMADD231SD (%RAX,%R8,8),%XMM0,%XMM8 |
(857) 0x43c8a9 ADD -0xa0(%RBP),%RDI |
(857) 0x43c8b0 VFMADD231SD (%RAX,%RDI,8),%XMM1,%XMM8 |
(857) 0x43c8b6 VFMADD213SD (%R13,%RBX,8),%XMM7,%XMM8 |
(857) 0x43c8bd VMOVSD %XMM8,(%R13,%RBX,8) |
(857) 0x43c8c4 JMP 43c800 |
0x43c8c9 MOV -0xc0(%RBP),%RDI |
0x43c8d0 CALL 48ad60 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> |
0x43c8d5 MOV -0x80(%RBP),%R12 |
0x43c8d9 MOV 0x30(%R12),%EAX |
0x43c8de TEST %EAX,%EAX |
0x43c8e0 MOV -0xb8(%RBP),%R15 |
0x43c8e7 JLE 43c945 |
0x43c8e9 XOR %EBX,%EBX |
0x43c8eb XOR %R14D,%R14D |
0x43c8ee JMP 43c8fc |
(854) 0x43c8f0 MOVSXD %EAX,%RCX |
(854) 0x43c8f3 ADD $0x18,%RBX |
(854) 0x43c8f7 CMP %RCX,%R14 |
(854) 0x43c8fa JGE 43c945 |
(854) 0x43c8fc MOV %R14D,%EDX |
(854) 0x43c8ff IMUL %EAX,%EDX |
(854) 0x43c902 INC %R14 |
(854) 0x43c905 MOV 0x40(%R12),%ECX |
(854) 0x43c90a IMUL %R14D,%ECX |
(854) 0x43c90e MOV 0x8(%R12),%ESI |
(854) 0x43c913 CMP %ECX,%ESI |
(854) 0x43c915 CMOVL %ESI,%ECX |
(854) 0x43c918 SUB %EDX,%ECX |
(854) 0x43c91a JLE 43c8f0 |
(854) 0x43c91c MOVSXD %EDX,%RDI |
(854) 0x43c91f SAL $0x3,%RDI |
(854) 0x43c923 ADD 0x18(%R15),%RDI |
(854) 0x43c927 MOV 0x310(%R12),%RAX |
(854) 0x43c92f MOV (%RAX,%RBX,1),%RSI |
(854) 0x43c933 MOV %ECX,%EDX |
(854) 0x43c935 SAL $0x3,%RDX |
(854) 0x43c939 CALL 4040a0 <memmove@plt> |
(854) 0x43c93e MOV 0x30(%R12),%EAX |
(854) 0x43c943 JMP 43c8f0 |
0x43c945 ADD $0x148,%RSP |
0x43c94c POP %RBX |
0x43c94d POP %R12 |
0x43c94f POP %R13 |
0x43c951 POP %R14 |
0x43c953 POP %R15 |
0x43c955 POP %RBP |
0x43c956 RET |
0x43c957 MOV %RAX,%RDI |
0x43c95a CALL 40d2b0 <__clang_call_terminate> |
0x43c95f NOP |
Path / |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | exec |
nb instructions | 121 |
nb uops | 133 |
loop length | 580 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 9 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
ADD-SUB / MUL ratio | 1.50 |
micro-operation queue | 22.17 cycles |
front end | 22.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.20 | 11.17 | 11.33 | 11.33 | 11.00 | 11.23 | 11.20 | 11.00 | 11.00 | 11.00 | 11.20 | 11.33 |
cycles | 11.20 | 11.17 | 11.33 | 11.33 | 11.00 | 11.23 | 11.20 | 11.00 | 11.00 | 11.00 | 11.20 | 11.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 22.17-22.24 |
Stall cycles | 0.00 |
Front-end | 22.17 |
Dispatch | 11.33 |
Overall L1 | 22.17 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 55% |
load | 21% |
store | 100% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 71% |
all | 34% |
load | 15% |
store | 30% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 45% |
all | 11% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 19% |
load | 15% |
store | 25% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 16% |
load | 14% |
store | 16% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x148,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 48ab60 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %R12D,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD 0x40(%R13),%RCX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
CMP %EAX,0x124(%R13) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x128(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMOVNE %RCX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0xf0(%R14),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD (%RAX),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULSD 0xd8(%R14),%XMM1,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231SD 0x8(%RAX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVDDUP 0x10(%RAX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD231SD 0x108(%R14),%XMM2,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVUPD 0xe0(%R14),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0xf8(%R14),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSHUFPD $0x1,%XMM1,%XMM1,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VUNPCKLPD 0xe8(%R14),%XMM4,%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 1 |
VMOVHPD 0x100(%R14),%XMM0,%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4-12 | 1 |
VMULPD %XMM5,%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %XMM4,%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x110(%R14),%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM1,%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD 0xbbdae(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,%XMM3,%XMM2,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %XMM4,%XMM5,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVD %XMM4,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD %XMM4,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
TEST $0x1,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43c309 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0xf9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VROUNDSD $0x9,%XMM3,%XMM3,%XMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM3,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVMSKPD %XMM3,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM0,%XMM0,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM0,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST $0x1,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43c32c <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x11c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVAPD %XMM3,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %RBX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VSHUFPD $0x1,%XMM0,%XMM0,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VROUNDSD $0x9,%XMM5,%XMM5,%XMM6 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM6,%XMM5,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x2,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43c34a <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x13a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVAPD %XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %R14,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c8c9 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6b9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VCMPSD $0x1,%XMM0,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBLENDVPD %XMM0,%XMM4,%XMM3,%XMM0 | 3 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2-3 | 1 |
VMOVUPD %XMM0,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VCMPSD $0x1,%XMM5,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBLENDVPD %XMM0,%XMM1,%XMM6,%XMM0 | 3 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2-3 | 1 |
VMOVUPD %XMM0,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV -0x80(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2f8(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x40(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R14,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x1,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADC $0,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DECQ -0x88(%RBP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x2,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0xbbcad(%RIP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xb8c2d(%RIP),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 43c41f <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x20f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xc0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 48ad60 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x80(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0xb8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 43c945 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x735> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43c8fc <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6ec> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
ADD $0x148,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 40d2b0 <__clang_call_terminate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | exec |
nb instructions | 121 |
nb uops | 133 |
loop length | 580 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 9 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
ADD-SUB / MUL ratio | 1.50 |
micro-operation queue | 22.17 cycles |
front end | 22.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.20 | 11.17 | 11.33 | 11.33 | 11.00 | 11.23 | 11.20 | 11.00 | 11.00 | 11.00 | 11.20 | 11.33 |
cycles | 11.20 | 11.17 | 11.33 | 11.33 | 11.00 | 11.23 | 11.20 | 11.00 | 11.00 | 11.00 | 11.20 | 11.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 22.17-22.24 |
Stall cycles | 0.00 |
Front-end | 22.17 |
Dispatch | 11.33 |
Overall L1 | 22.17 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 55% |
load | 21% |
store | 100% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 71% |
all | 34% |
load | 15% |
store | 30% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 45% |
all | 11% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 19% |
load | 15% |
store | 25% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 16% |
load | 14% |
store | 16% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x148,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 48ab60 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %R12D,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD 0x40(%R13),%RCX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
CMP %EAX,0x124(%R13) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x128(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMOVNE %RCX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0xf0(%R14),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD (%RAX),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULSD 0xd8(%R14),%XMM1,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VFMADD231SD 0x8(%RAX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVDDUP 0x10(%RAX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD231SD 0x108(%R14),%XMM2,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVUPD 0xe0(%R14),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVSD 0xf8(%R14),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSHUFPD $0x1,%XMM1,%XMM1,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VUNPCKLPD 0xe8(%R14),%XMM4,%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 1 |
VMOVHPD 0x100(%R14),%XMM0,%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4-12 | 1 |
VMULPD %XMM5,%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %XMM4,%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x110(%R14),%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM1,%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD 0xbbdae(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,%XMM3,%XMM2,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %XMM4,%XMM5,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVD %XMM4,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD %XMM4,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
TEST $0x1,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43c309 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0xf9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VROUNDSD $0x9,%XMM3,%XMM3,%XMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM3,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVMSKPD %XMM3,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM0,%XMM0,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM0,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST $0x1,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43c32c <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x11c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVAPD %XMM3,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %RBX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VSHUFPD $0x1,%XMM0,%XMM0,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VROUNDSD $0x9,%XMM5,%XMM5,%XMM6 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM6,%XMM5,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x2,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43c34a <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x13a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVAPD %XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %R14,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43c8c9 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6b9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VCMPSD $0x1,%XMM0,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBLENDVPD %XMM0,%XMM4,%XMM3,%XMM0 | 3 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2-3 | 1 |
VMOVUPD %XMM0,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VCMPSD $0x1,%XMM5,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBLENDVPD %XMM0,%XMM1,%XMM6,%XMM0 | 3 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2-3 | 1 |
VMOVUPD %XMM0,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV -0x80(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2f8(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x40(%RAX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R14,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP $0x1,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADC $0,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DECQ -0x88(%RBP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x2,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0xbbcad(%RIP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xb8c2d(%RIP),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 43c41f <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x20f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xc0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 48ad60 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x80(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0xb8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 43c945 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x735> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43c8fc <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6ec> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
ADD $0x148,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 40d2b0 <__clang_call_terminate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE– | 23.91 | 24.74 |
▼Loop 855 - MultiBsplineRef.hpp:42-71 - exec– | 0.01 | 0.01 |
▼Loop 856 - MultiBsplineRef.hpp:63-71 - exec– | 0 | 0 |
▼Loop 857 - MultiBsplineRef.hpp:64-71 - exec– | 0 | 0 |
○Loop 858 - MultiBsplineRef.hpp:68-70 - exec | 23.89 | 24.46 |
○Loop 854 - einspline_spo_ref.hpp:183-187 - exec | 0 | 0.01 |