Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: exec | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 24.18% |
---|
Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: exec | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 24.18% |
---|
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorTensorOps.h: 150 - 152 |
-------------------------------------------------------------------------------- |
150: return TinyVector<Type_t, 3>(lhs[0] * rhs[0] + lhs[1] * rhs[3] + lhs[2] * rhs[6], |
151: lhs[0] * rhs[1] + lhs[1] * rhs[4] + lhs[2] * rhs[7], |
152: lhs[0] * rhs[2] + lhs[1] * rhs[5] + lhs[2] * rhs[8]); |
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/Particle/Lattice/CrystalLattice.h: 170 - 170 |
-------------------------------------------------------------------------------- |
170: if (-std::numeric_limits<T1>::epsilon() < val_dot[i] && val_dot[i] < 0) |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/stl_algobase.h: 200 - 696 |
-------------------------------------------------------------------------------- |
200: if (__b < __a) |
[...] |
366: const ptrdiff_t _Num = __last - __first; |
367: if (_Num) |
368: __builtin_memmove(__result, __first, sizeof(_Tp) * _Num); |
[...] |
695: for (; __first != __last; ++__first) |
696: *__first = __tmp; |
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineEvalHelper.hpp: 45 - 47 |
-------------------------------------------------------------------------------- |
45: T sf = std::floor(x); |
46: T dx2 = x - sf; |
47: int ind2 = std::min(std::max(0, static_cast<int>(sf)), nmax); |
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/Tensor.h: 213 - 213 |
-------------------------------------------------------------------------------- |
213: inline Type_t operator[](unsigned int i) const { return X[i]; } |
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 61 - 61 |
-------------------------------------------------------------------------------- |
61: for (size_t d = 0; d < D; ++d) |
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineRef.hpp: 42 - 71 |
-------------------------------------------------------------------------------- |
42: x -= spline_m->x_grid.start; |
43: y -= spline_m->y_grid.start; |
44: z -= spline_m->z_grid.start; |
45: T tx, ty, tz; |
46: int ix, iy, iz; |
47: spline2::getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); |
48: spline2::getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); |
49: spline2::getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); |
[...] |
56: const intptr_t xs = spline_m->x_stride; |
57: const intptr_t ys = spline_m->y_stride; |
58: const intptr_t zs = spline_m->z_stride; |
59: |
60: constexpr T zero(0); |
61: std::fill(vals, vals + num_splines, zero); |
62: |
63: for (size_t i = 0; i < 4; i++) |
64: for (size_t j = 0; j < 4; j++) |
65: { |
66: const T pre00 = a[i] * b[j]; |
67: const T* restrict coefs = spline_m->coefs + (ix + i) * xs + (iy + j) * ys + iz * zs; |
68: for (size_t n = 0; n < num_splines; n++) |
69: vals[n] += pre00 * |
70: (c[0] * coefs[n] + c[1] * coefs[n + zs] + c[2] * coefs[n + 2 * zs] + |
71: c[3] * coefs[n + 3 * zs]); |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/stl_vector.h: 933 - 1056 |
-------------------------------------------------------------------------------- |
933: return *(this->_M_impl._M_start + __n); |
[...] |
1056: { return _M_data_ptr(this->_M_impl._M_start); } |
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineData.hpp: 54 - 57 |
-------------------------------------------------------------------------------- |
54: a[0] = ((A00 * tx + A01) * tx + A02) * tx + A03; |
55: a[1] = ((A10 * tx + A11) * tx + A12) * tx + A13; |
56: a[2] = ((A20 * tx + A21) * tx + A22) * tx + A23; |
57: a[3] = ((A30 * tx + A31) * tx + A32) * tx + A33; |
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 143 - 143 |
-------------------------------------------------------------------------------- |
143: return (active_ptcl_ == iat) ? active_pos_ : R[iat]; |
/scratch_na/users/xoserete/qaas_runs/171-417-8059/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/einspline_spo_ref.hpp: 172 - 189 |
-------------------------------------------------------------------------------- |
172: ScopedTimer local_timer(timer); |
173: |
174: auto u = Lattice.toUnit_floor(P.activeR(iat)); |
175: for (int i = 0; i < nBlocks; ++i) |
176: MultiBsplineEvalRef::evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); |
177: } |
178: |
179: inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) |
180: { |
181: evaluate_v(P, iat); |
182: |
183: for (int i = 0; i < nBlocks; ++i) |
184: { |
185: // in real simulation, phase needs to be applied. Here just fake computation |
186: const int first = i * nBlocks; |
187: std::copy_n(psi[i].data(), std::min((i + 1) * nSplinesPerBlock, OrbitalSetSize) - first, psi_v.data() + first); |
188: } |
189: } |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/stl_algo.h: 782 - 782 |
-------------------------------------------------------------------------------- |
782: { return std::copy(__first, __first + __n, __result); } |
0x43aea0 PUSH %RBP |
0x43aea1 MOV %RSP,%RBP |
0x43aea4 PUSH %R15 |
0x43aea6 PUSH %R14 |
0x43aea8 PUSH %R13 |
0x43aeaa PUSH %R12 |
0x43aeac PUSH %RBX |
0x43aead SUB $0x148,%RSP |
0x43aeb4 MOV %RCX,%RBX |
0x43aeb7 MOV %EDX,%R12D |
0x43aeba MOV %RSI,%R13 |
0x43aebd MOV %RDI,%R14 |
0x43aec0 MOV 0x358(%RDI),%RDI |
0x43aec7 MOV %RDI,-0xb0(%RBP) |
0x43aece CALL 48b590 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> |
0x43aed3 MOVSXD %R12D,%RAX |
0x43aed6 LEA (%RAX,%RAX,2),%RCX |
0x43aeda SAL $0x3,%RCX |
0x43aede ADD 0x40(%R13),%RCX |
0x43aee2 CMP %EAX,0x124(%R13) |
0x43aee9 LEA 0x128(%R13),%RAX |
0x43aef0 CMOVNE %RCX,%RAX |
0x43aef4 VMOVSD 0xf0(%R14),%XMM0 |
0x43aefd VMOVUPD (%RAX),%XMM1 |
0x43af01 VMULSD 0xd8(%R14),%XMM1,%XMM3 |
0x43af0a VMOVUPD 0xe0(%R14),%XMM2 |
0x43af13 VFMADD231SD 0x8(%RAX),%XMM0,%XMM3 |
0x43af19 VMOVDDUP 0x10(%RAX),%XMM4 |
0x43af1e VFMADD231SD 0x108(%R14),%XMM4,%XMM3 |
0x43af27 VMOVSD 0xf8(%R14),%XMM0 |
0x43af30 VSHUFPD $0x1,%XMM1,%XMM1,%XMM5 |
0x43af35 VUNPCKLPD 0xe8(%R14),%XMM0,%XMM6 |
0x43af3e VMOVHPD 0x100(%R14),%XMM2,%XMM0 |
0x43af47 VMULPD %XMM5,%XMM6,%XMM2 |
0x43af4b VFMADD213PD %XMM2,%XMM1,%XMM0 |
0x43af50 VFMADD231PD 0x110(%R14),%XMM4,%XMM0 |
0x43af59 VXORPD %XMM1,%XMM1,%XMM1 |
0x43af5d VCMPPD $0x1,%XMM1,%XMM3,%XMM4 |
0x43af62 VMOVSD 0xbec5e(%RIP),%XMM2 |
0x43af6a VCMPPD $0x1,%XMM3,%XMM2,%XMM5 |
0x43af6f VANDPD %XMM4,%XMM5,%XMM4 |
0x43af73 VMOVD %XMM4,%EAX |
0x43af77 VXORPD %XMM4,%XMM4,%XMM4 |
0x43af7b VMOVUPD %XMM4,-0x110(%RBP) |
0x43af83 TEST $0x1,%AL |
0x43af85 JNE 43af99 |
0x43af87 VROUNDSD $0x9,%XMM3,%XMM3,%XMM4 |
0x43af8d VSUBSD %XMM4,%XMM3,%XMM3 |
0x43af91 VMOVUPD %XMM3,-0x110(%RBP) |
0x43af99 VXORPD %XMM3,%XMM3,%XMM3 |
0x43af9d VCMPPD $0x1,%XMM3,%XMM0,%XMM3 |
0x43afa2 VMOVMSKPD %XMM3,%EAX |
0x43afa6 VROUNDSD $0x9,%XMM0,%XMM0,%XMM3 |
0x43afac VSUBSD %XMM3,%XMM0,%XMM3 |
0x43afb0 VXORPD %XMM4,%XMM4,%XMM4 |
0x43afb4 TEST $0x1,%AL |
0x43afb6 JNE 43afbc |
0x43afb8 VMOVAPD %XMM3,%XMM4 |
0x43afbc MOV %RBX,-0xa8(%RBP) |
0x43afc3 VSHUFPD $0x1,%XMM0,%XMM0,%XMM5 |
0x43afc8 VROUNDSD $0x9,%XMM5,%XMM5,%XMM6 |
0x43afce VSUBSD %XMM6,%XMM5,%XMM6 |
0x43afd2 TEST $0x2,%AL |
0x43afd4 JNE 43afda |
0x43afd6 VMOVAPD %XMM6,%XMM1 |
0x43afda MOV %R14,-0x68(%RBP) |
0x43afde MOV 0x30(%R14),%EAX |
0x43afe2 MOV %RAX,-0x70(%RBP) |
0x43afe6 TEST %EAX,%EAX |
0x43afe8 JLE 43b55a |
0x43afee VCMPSD $0x1,%XMM0,%XMM2,%XMM0 |
0x43aff3 VBLENDVPD %XMM0,%XMM4,%XMM3,%XMM0 |
0x43aff9 VMOVUPD %XMM0,-0x130(%RBP) |
0x43b001 VCMPSD $0x1,%XMM5,%XMM2,%XMM0 |
0x43b006 VBLENDVPD %XMM0,%XMM1,%XMM6,%XMM0 |
0x43b00c VMOVUPD %XMM0,-0x120(%RBP) |
0x43b014 MOV -0x68(%RBP),%RAX |
0x43b018 MOV 0x2f8(%RAX),%RCX |
0x43b01f MOV %RCX,-0xc8(%RBP) |
0x43b026 MOV 0x310(%RAX),%RCX |
0x43b02d MOV %RCX,-0xc0(%RBP) |
0x43b034 MOVSXD 0x40(%RAX),%R12 |
0x43b038 LEA (,%R12,8),%RAX |
0x43b040 MOV %RAX,-0xb8(%RBP) |
0x43b047 CMP $0x1,%R12 |
0x43b04b MOV %R12,%R15 |
0x43b04e ADC $0,%R15 |
0x43b052 DECQ -0x70(%RBP) |
0x43b056 MOV %R15,%RAX |
0x43b059 SHR $0x1,%RAX |
0x43b05c MOV %RAX,-0xf0(%RBP) |
0x43b063 MOV %R15,-0xf8(%RBP) |
0x43b06a AND $-0x2,%R15 |
0x43b06e XOR %ECX,%ECX |
0x43b070 VMOVSD 0xbeb58(%RIP),%XMM10 |
0x43b078 VMOVSD 0xbbb58(%RIP),%XMM11 |
0x43b080 JMP 43b0a5 |
0x43b082 NOPW %CS:(%RAX,%RAX,1) |
(843) 0x43b090 MOV -0xe8(%RBP),%RCX |
(843) 0x43b097 CMP -0x70(%RBP),%RCX |
(843) 0x43b09b LEA 0x1(%RCX),%RCX |
(843) 0x43b09f JE 43b55a |
(843) 0x43b0a5 MOV -0xc8(%RBP),%RAX |
(843) 0x43b0ac MOV (%RAX,%RCX,8),%R14 |
(843) 0x43b0b0 MOV %RCX,-0xe8(%RBP) |
(843) 0x43b0b7 LEA (%RCX,%RCX,2),%RAX |
(843) 0x43b0bb MOV -0xc0(%RBP),%RCX |
(843) 0x43b0c2 MOV (%RCX,%RAX,8),%R13 |
(843) 0x43b0c6 VMOVUPD -0x110(%RBP),%XMM0 |
(843) 0x43b0ce VSUBSD 0x28(%R14),%XMM0,%XMM0 |
(843) 0x43b0d4 VMOVUPD -0x130(%RBP),%XMM1 |
(843) 0x43b0dc VSUBSD 0x50(%R14),%XMM1,%XMM1 |
(843) 0x43b0e2 VMOVUPD -0x120(%RBP),%XMM2 |
(843) 0x43b0ea VSUBSD 0x78(%R14),%XMM2,%XMM2 |
(843) 0x43b0f0 VMULSD 0x48(%R14),%XMM0,%XMM0 |
(843) 0x43b0f6 MOVSXD 0x38(%R14),%RBX |
(843) 0x43b0fa VROUNDSD $0x9,%XMM0,%XMM0,%XMM14 |
(843) 0x43b100 VMULSD 0x70(%R14),%XMM1,%XMM1 |
(843) 0x43b106 VSUBSD %XMM14,%XMM0,%XMM3 |
(843) 0x43b10b MOVSXD 0x60(%R14),%R8 |
(843) 0x43b10f VMULSD %XMM3,%XMM10,%XMM4 |
(843) 0x43b113 VSUBSD %XMM4,%XMM11,%XMM0 |
(843) 0x43b117 VMULSD %XMM3,%XMM3,%XMM5 |
(843) 0x43b11b VMOVDDUP %XMM3,%XMM6 |
(843) 0x43b11f VMOVDDUP 0xbbab1(%RIP),%XMM8 |
(843) 0x43b127 VBLENDPD $0x1,%XMM0,%XMM8,%XMM0 |
(843) 0x43b12d VMULPD %XMM0,%XMM6,%XMM7 |
(843) 0x43b131 VMOVUPD 0xbe9e7(%RIP),%XMM9 |
(843) 0x43b139 VADDPD %XMM7,%XMM9,%XMM0 |
(843) 0x43b13d VUNPCKLPD %XMM5,%XMM6,%XMM6 |
(843) 0x43b141 VMOVUPD 0xbe9e7(%RIP),%XMM12 |
(843) 0x43b149 VFMADD213PD %XMM12,%XMM0,%XMM6 |
(843) 0x43b14e VROUNDSD $0x9,%XMM1,%XMM1,%XMM15 |
(843) 0x43b154 VSUBSD %XMM15,%XMM1,%XMM1 |
(843) 0x43b159 VMULSD 0x98(%R14),%XMM2,%XMM0 |
(843) 0x43b162 MOVSXD 0x88(%R14),%R10 |
(843) 0x43b169 VMOVUPD %XMM6,-0x170(%RBP) |
(843) 0x43b171 VSHUFPD $0x1,%XMM7,%XMM7,%XMM2 |
(843) 0x43b176 VSUBSD %XMM2,%XMM11,%XMM2 |
(843) 0x43b17a VFMADD213SD %XMM11,%XMM3,%XMM2 |
(843) 0x43b17f VFMADD213SD %XMM10,%XMM3,%XMM2 |
(843) 0x43b184 VMOVSD %XMM2,-0x160(%RBP) |
(843) 0x43b18c VMULSD %XMM5,%XMM4,%XMM2 |
(843) 0x43b190 VMOVSD %XMM2,-0x158(%RBP) |
(843) 0x43b198 VMULSD %XMM1,%XMM10,%XMM2 |
(843) 0x43b19c VSUBSD %XMM2,%XMM11,%XMM3 |
(843) 0x43b1a0 VMULSD %XMM1,%XMM1,%XMM4 |
(843) 0x43b1a4 VMOVDDUP %XMM1,%XMM5 |
(843) 0x43b1a8 VBLENDPD $0x1,%XMM3,%XMM8,%XMM3 |
(843) 0x43b1ae VMULPD %XMM3,%XMM5,%XMM3 |
(843) 0x43b1b2 VADDPD %XMM3,%XMM9,%XMM6 |
(843) 0x43b1b6 VUNPCKLPD %XMM4,%XMM5,%XMM5 |
(843) 0x43b1ba VFMADD213PD %XMM12,%XMM6,%XMM5 |
(843) 0x43b1bf VMOVUPD %XMM5,-0x150(%RBP) |
(843) 0x43b1c7 VSHUFPD $0x1,%XMM3,%XMM3,%XMM3 |
(843) 0x43b1cc VSUBSD %XMM3,%XMM11,%XMM3 |
(843) 0x43b1d0 VFMADD213SD %XMM11,%XMM1,%XMM3 |
(843) 0x43b1d5 VFMADD213SD %XMM10,%XMM1,%XMM3 |
(843) 0x43b1da VMOVSD %XMM3,-0x140(%RBP) |
(843) 0x43b1e2 VROUNDSD $0x9,%XMM0,%XMM0,%XMM3 |
(843) 0x43b1e8 VMULSD %XMM4,%XMM2,%XMM1 |
(843) 0x43b1ec VMOVSD %XMM1,-0x138(%RBP) |
(843) 0x43b1f4 VSUBSD %XMM3,%XMM0,%XMM2 |
(843) 0x43b1f8 VMULSD %XMM2,%XMM10,%XMM1 |
(843) 0x43b1fc VSUBSD %XMM1,%XMM11,%XMM12 |
(843) 0x43b200 VFMADD213SD 0xbb9e7(%RIP),%XMM2,%XMM12 |
(843) 0x43b209 VMULSD %XMM2,%XMM11,%XMM0 |
(843) 0x43b20d VADDSD 0xbe9c3(%RIP),%XMM0,%XMM13 |
(843) 0x43b215 VMULSD %XMM2,%XMM2,%XMM4 |
(843) 0x43b219 VFMADD213SD 0xbe9be(%RIP),%XMM4,%XMM13 |
(843) 0x43b222 MOV 0x10(%R14),%RAX |
(843) 0x43b226 MOV %RAX,-0x78(%RBP) |
(843) 0x43b22a MOV 0x18(%R14),%RAX |
(843) 0x43b22e MOV %RAX,-0x80(%RBP) |
(843) 0x43b232 MOV 0x20(%R14),%R11 |
(843) 0x43b236 MOV $0x1fffffffffffffff,%RAX |
(843) 0x43b240 TEST %RAX,%R12 |
(843) 0x43b243 JE 43b2fa |
(843) 0x43b249 MOV %R13,%RDI |
(843) 0x43b24c XOR %ESI,%ESI |
(843) 0x43b24e MOV -0xb8(%RBP),%RDX |
(843) 0x43b255 VMOVUPD %XMM12,-0xa0(%RBP) |
(843) 0x43b25d VMOVUPD %XMM13,-0x90(%RBP) |
(843) 0x43b265 MOV %R10,-0x60(%RBP) |
(843) 0x43b269 MOV %R8,-0x58(%RBP) |
(843) 0x43b26d MOV %R11,-0x50(%RBP) |
(843) 0x43b271 VMOVSD %XMM14,-0x48(%RBP) |
(843) 0x43b276 VMOVSD %XMM15,-0x40(%RBP) |
(843) 0x43b27b VMOVSD %XMM1,-0x38(%RBP) |
(843) 0x43b280 VMOVSD %XMM2,-0x30(%RBP) |
(843) 0x43b285 VMOVSD %XMM3,-0xe0(%RBP) |
(843) 0x43b28d VMOVSD %XMM4,-0xd8(%RBP) |
(843) 0x43b295 VMOVSD %XMM0,-0xd0(%RBP) |
(843) 0x43b29d CALL 4e7e80 <_intel_fast_memset> |
(843) 0x43b2a2 VMOVSD -0xd0(%RBP),%XMM0 |
(843) 0x43b2aa VMOVSD -0xd8(%RBP),%XMM4 |
(843) 0x43b2b2 VMOVSD -0xe0(%RBP),%XMM3 |
(843) 0x43b2ba VMOVSD -0x30(%RBP),%XMM2 |
(843) 0x43b2bf VMOVSD -0x38(%RBP),%XMM1 |
(843) 0x43b2c4 VMOVSD -0x40(%RBP),%XMM15 |
(843) 0x43b2c9 VMOVSD -0x48(%RBP),%XMM14 |
(843) 0x43b2ce MOV -0x50(%RBP),%R11 |
(843) 0x43b2d2 MOV -0x58(%RBP),%R8 |
(843) 0x43b2d6 MOV -0x60(%RBP),%R10 |
(843) 0x43b2da VMOVUPD -0x90(%RBP),%XMM13 |
(843) 0x43b2e2 VMOVUPD -0xa0(%RBP),%XMM12 |
(843) 0x43b2ea VMOVSD 0xbb8e6(%RIP),%XMM11 |
(843) 0x43b2f2 VMOVSD 0xbe8d6(%RIP),%XMM10 |
(843) 0x43b2fa VCVTTSD2SI %XMM14,%EDX |
(843) 0x43b2ff VCVTTSD2SI %XMM15,%ESI |
(843) 0x43b304 VCVTTSD2SI %XMM3,%ECX |
(843) 0x43b308 VFMADD213SD %XMM10,%XMM2,%XMM12 |
(843) 0x43b30d VSUBSD %XMM0,%XMM11,%XMM0 |
(843) 0x43b311 VFMADD213SD %XMM11,%XMM2,%XMM0 |
(843) 0x43b316 VFMADD213SD %XMM10,%XMM2,%XMM0 |
(843) 0x43b31b MOV 0x8(%R14),%RAX |
(843) 0x43b31f VMULSD %XMM4,%XMM1,%XMM1 |
(843) 0x43b323 DEC %R10 |
(843) 0x43b326 MOV %ECX,%EDI |
(843) 0x43b328 SAR $0x1f,%EDI |
(843) 0x43b32b ANDN %ECX,%EDI,%ECX |
(843) 0x43b330 CMP %RCX,%R10 |
(843) 0x43b333 CMOVGE %RCX,%R10 |
(843) 0x43b337 MOV %R11,%RCX |
(843) 0x43b33a IMUL %R10,%RCX |
(843) 0x43b33e DEC %R8 |
(843) 0x43b341 MOV %ESI,%EDI |
(843) 0x43b343 SAR $0x1f,%EDI |
(843) 0x43b346 ANDN %ESI,%EDI,%ESI |
(843) 0x43b34b CMP %RSI,%R8 |
(843) 0x43b34e CMOVGE %RSI,%R8 |
(843) 0x43b352 MOV -0x80(%RBP),%RDI |
(843) 0x43b356 IMUL %RDI,%R8 |
(843) 0x43b35a DEC %RBX |
(843) 0x43b35d MOV %EDX,%ESI |
(843) 0x43b35f SAR $0x1f,%ESI |
(843) 0x43b362 ANDN %EDX,%ESI,%EDX |
(843) 0x43b367 CMP %RDX,%RBX |
(843) 0x43b36a CMOVGE %RDX,%RBX |
(843) 0x43b36e MOV -0x78(%RBP),%RSI |
(843) 0x43b372 IMUL %RSI,%RBX |
(843) 0x43b376 ADD %R8,%RBX |
(843) 0x43b379 LEA (%R11,%R11,2),%R8 |
(843) 0x43b37d VMOVDDUP %XMM13,%XMM2 |
(843) 0x43b382 VMOVDDUP %XMM12,%XMM3 |
(843) 0x43b387 VMOVDDUP %XMM0,%XMM4 |
(843) 0x43b38b VMOVDDUP %XMM1,%XMM5 |
(843) 0x43b38f LEA (%RBX,%RCX,1),%RDX |
(843) 0x43b393 LEA (%R11,%RCX,1),%R9 |
(843) 0x43b397 ADD %RBX,%R9 |
(843) 0x43b39a ADD %R15,%R9 |
(843) 0x43b39d MOV %R9,-0x90(%RBP) |
(843) 0x43b3a4 LEA (%RCX,%R11,2),%R9 |
(843) 0x43b3a8 ADD %RBX,%R9 |
(843) 0x43b3ab ADD %R15,%R9 |
(843) 0x43b3ae MOV %R9,-0x60(%RBP) |
(843) 0x43b3b2 ADD %RCX,%R8 |
(843) 0x43b3b5 ADD %RBX,%R8 |
(843) 0x43b3b8 ADD %R15,%R8 |
(843) 0x43b3bb MOV %R8,-0xa0(%RBP) |
(843) 0x43b3c2 LEA 0x3(%R10),%RCX |
(843) 0x43b3c6 IMUL %R11,%RCX |
(843) 0x43b3ca ADD %RBX,%RCX |
(843) 0x43b3cd LEA (%RAX,%RCX,8),%R8 |
(843) 0x43b3d1 LEA (,%RSI,8),%RCX |
(843) 0x43b3d9 MOV %RCX,-0x30(%RBP) |
(843) 0x43b3dd LEA (,%RDI,8),%R9 |
(843) 0x43b3e5 LEA 0x2(%R10),%RCX |
(843) 0x43b3e9 IMUL %R11,%RCX |
(843) 0x43b3ed ADD %RBX,%RCX |
(843) 0x43b3f0 LEA (%RAX,%RCX,8),%RSI |
(843) 0x43b3f4 INC %R10 |
(843) 0x43b3f7 IMUL %R11,%R10 |
(843) 0x43b3fb ADD %RBX,%R10 |
(843) 0x43b3fe LEA (%RAX,%R10,8),%R11 |
(843) 0x43b402 LEA (%RAX,%RDX,8),%RCX |
(843) 0x43b406 LEA (%RDX,%R15,1),%RDI |
(843) 0x43b40a MOV %R8,%RDX |
(843) 0x43b40d XOR %R8D,%R8D |
(843) 0x43b410 JMP 43b455 |
0x43b412 NOPW %CS:(%RAX,%RAX,1) |
(844) 0x43b420 MOV -0x58(%RBP),%RDX |
(844) 0x43b424 MOV -0x30(%RBP),%RCX |
(844) 0x43b428 ADD %RCX,%RDX |
(844) 0x43b42b MOV -0x50(%RBP),%RSI |
(844) 0x43b42f ADD %RCX,%RSI |
(844) 0x43b432 MOV -0x48(%RBP),%R11 |
(844) 0x43b436 ADD %RCX,%R11 |
(844) 0x43b439 MOV -0x40(%RBP),%R8 |
(844) 0x43b43d ADD %RCX,%R8 |
(844) 0x43b440 MOV %R8,%RCX |
(844) 0x43b443 MOV -0x38(%RBP),%R8 |
(844) 0x43b447 CMP $0x3,%R8 |
(844) 0x43b44b LEA 0x1(%R8),%R8 |
(844) 0x43b44f JE 43b090 |
(844) 0x43b455 VMOVSD -0x170(%RBP,%R8,8),%XMM6 |
(844) 0x43b45f MOV -0x78(%RBP),%R10 |
(844) 0x43b463 MOV %R8,-0x38(%RBP) |
(844) 0x43b467 IMUL %R8,%R10 |
(844) 0x43b46b MOV %RCX,-0x40(%RBP) |
(844) 0x43b46f MOV %RCX,%R8 |
(844) 0x43b472 MOV %R11,-0x48(%RBP) |
(844) 0x43b476 MOV %RSI,-0x50(%RBP) |
(844) 0x43b47a MOV %RSI,%RCX |
(844) 0x43b47d MOV %RDX,-0x58(%RBP) |
(844) 0x43b481 XOR %ESI,%ESI |
(844) 0x43b483 JMP 43b4aa |
0x43b485 NOPW %CS:(%RAX,%RAX,1) |
(845) 0x43b490 ADD %R9,%RDX |
(845) 0x43b493 ADD %R9,%RCX |
(845) 0x43b496 ADD %R9,%R11 |
(845) 0x43b499 ADD %R9,%R8 |
(845) 0x43b49c CMP $0x3,%RSI |
(845) 0x43b4a0 LEA 0x1(%RSI),%RSI |
(845) 0x43b4a4 JE 43b420 |
(845) 0x43b4aa TEST %R12D,%R12D |
(845) 0x43b4ad JE 43b490 |
(845) 0x43b4af VMULSD -0x150(%RBP,%RSI,8),%XMM6,%XMM7 |
(845) 0x43b4b8 CMP $0x1,%R12D |
(845) 0x43b4bc JE 43b4ff |
(845) 0x43b4be VMOVDDUP %XMM7,%XMM8 |
(845) 0x43b4c2 MOV -0xf0(%RBP),%RBX |
(845) 0x43b4c9 XOR %R14D,%R14D |
(845) 0x43b4cc NOPL (%RAX) |
(846) 0x43b4d0 VMULPD (%R8,%R14,1),%XMM3,%XMM9 |
(846) 0x43b4d6 VFMADD231PD (%R11,%R14,1),%XMM2,%XMM9 |
(846) 0x43b4dc VFMADD231PD (%RCX,%R14,1),%XMM4,%XMM9 |
(846) 0x43b4e2 VFMADD231PD (%RDX,%R14,1),%XMM5,%XMM9 |
(846) 0x43b4e8 VFMADD213PD (%R13,%R14,1),%XMM8,%XMM9 |
(846) 0x43b4ef VMOVUPD %XMM9,(%R13,%R14,1) |
(846) 0x43b4f6 ADD $0x10,%R14 |
(846) 0x43b4fa DEC %RBX |
(846) 0x43b4fd JNE 43b4d0 |
(845) 0x43b4ff CMP -0xf8(%RBP),%R15 |
(845) 0x43b506 JE 43b490 |
(845) 0x43b508 MOV -0x80(%RBP),%RBX |
(845) 0x43b50c IMUL %RSI,%RBX |
(845) 0x43b510 ADD %R10,%RBX |
(845) 0x43b513 LEA (%RDI,%RBX,1),%R14 |
(845) 0x43b517 VMULSD (%RAX,%R14,8),%XMM12,%XMM8 |
(845) 0x43b51d MOV -0x90(%RBP),%R14 |
(845) 0x43b524 ADD %RBX,%R14 |
(845) 0x43b527 VFMADD231SD (%RAX,%R14,8),%XMM13,%XMM8 |
(845) 0x43b52d MOV -0x60(%RBP),%R14 |
(845) 0x43b531 ADD %RBX,%R14 |
(845) 0x43b534 VFMADD231SD (%RAX,%R14,8),%XMM0,%XMM8 |
(845) 0x43b53a ADD -0xa0(%RBP),%RBX |
(845) 0x43b541 VFMADD231SD (%RAX,%RBX,8),%XMM1,%XMM8 |
(845) 0x43b547 VFMADD213SD (%R13,%R15,8),%XMM7,%XMM8 |
(845) 0x43b54e VMOVSD %XMM8,(%R13,%R15,8) |
(845) 0x43b555 JMP 43b490 |
0x43b55a MOV -0xb0(%RBP),%RDI |
0x43b561 CALL 48b780 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> |
0x43b566 MOV -0x68(%RBP),%R12 |
0x43b56a MOV 0x30(%R12),%EAX |
0x43b56f TEST %EAX,%EAX |
0x43b571 MOV -0xa8(%RBP),%R15 |
0x43b578 JLE 43b5f3 |
0x43b57a XOR %EBX,%EBX |
0x43b57c XOR %R14D,%R14D |
0x43b57f JMP 43b59c |
0x43b581 NOPW %CS:(%RAX,%RAX,1) |
(842) 0x43b590 MOVSXD %EAX,%RCX |
(842) 0x43b593 ADD $0x18,%RBX |
(842) 0x43b597 CMP %RCX,%R14 |
(842) 0x43b59a JGE 43b5f3 |
(842) 0x43b59c MOV %R14D,%ECX |
(842) 0x43b59f IMUL %EAX,%ECX |
(842) 0x43b5a2 INC %R14 |
(842) 0x43b5a5 MOV 0x40(%R12),%EDX |
(842) 0x43b5aa IMUL %R14D,%EDX |
(842) 0x43b5ae MOV 0x8(%R12),%ESI |
(842) 0x43b5b3 CMP %EDX,%ESI |
(842) 0x43b5b5 CMOVL %ESI,%EDX |
(842) 0x43b5b8 SUB %ECX,%EDX |
(842) 0x43b5ba MOVSXD %EDX,%RDX |
(842) 0x43b5bd MOV $0x1fffffffffffffff,%RSI |
(842) 0x43b5c7 TEST %RSI,%RDX |
(842) 0x43b5ca JE 43b590 |
(842) 0x43b5cc MOV 0x310(%R12),%RAX |
(842) 0x43b5d4 MOV (%RAX,%RBX,1),%RSI |
(842) 0x43b5d8 SAL $0x3,%RDX |
(842) 0x43b5dc MOVSXD %ECX,%RDI |
(842) 0x43b5df SAL $0x3,%RDI |
(842) 0x43b5e3 ADD 0x18(%R15),%RDI |
(842) 0x43b5e7 CALL 403750 <memmove@plt> |
(842) 0x43b5ec MOV 0x30(%R12),%EAX |
(842) 0x43b5f1 JMP 43b590 |
0x43b5f3 ADD $0x148,%RSP |
0x43b5fa POP %RBX |
0x43b5fb POP %R12 |
0x43b5fd POP %R13 |
0x43b5ff POP %R14 |
0x43b601 POP %R15 |
0x43b603 POP %RBP |
0x43b604 RET |
0x43b605 MOV %RAX,%RDI |
0x43b608 CALL 40cee0 <__clang_call_terminate> |
0x43b60d NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | qmcplusplus::SPOSet::evaluateD[...] | OhmmsVector.h:178 | exec |
○ | miniqmcreference::DiracDetermi[...] | DiracDeterminantRef.cpp:217 | exec |
○ | qmcplusplus::WaveFunction::eva[...] | stl_vector.h:806 | exec |
○ | qmcplusplus::NonLocalPP<double[...] | NonLocalPP.hpp:135 | exec |
○ | main.extracted.110 | NewTimer.h:249 | exec |
○ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | exec |
nb instructions | 122 |
nb uops | 134 |
loop length | 604 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 9 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
ADD-SUB / MUL ratio | 1.50 |
micro-operation queue | 22.33 cycles |
front end | 22.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.20 | 11.17 | 11.33 | 11.33 | 11.50 | 11.23 | 11.20 | 11.50 | 11.50 | 11.50 | 11.20 | 11.33 |
cycles | 11.20 | 11.17 | 11.33 | 11.33 | 11.50 | 11.23 | 11.20 | 11.50 | 11.50 | 11.50 | 11.20 | 11.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 22.35-22.40 |
Stall cycles | 0.00 |
Front-end | 22.33 |
Dispatch | 11.50 |
Overall L1 | 22.33 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 55% |
load | 21% |
store | 100% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 71% |
all | 34% |
load | 15% |
store | 28% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 46% |
all | 11% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 19% |
load | 15% |
store | 25% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 16% |
load | 14% |
store | 16% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x148,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 48b590 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %R12D,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD 0x40(%R13),%RCX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
CMP %EAX,0x124(%R13) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x128(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMOVNE %RCX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0xf0(%R14),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD (%RAX),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULSD 0xd8(%R14),%XMM1,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVUPD 0xe0(%R14),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD231SD 0x8(%RAX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVDDUP 0x10(%RAX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD231SD 0x108(%R14),%XMM4,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xf8(%R14),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSHUFPD $0x1,%XMM1,%XMM1,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VUNPCKLPD 0xe8(%R14),%XMM0,%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 1 |
VMOVHPD 0x100(%R14),%XMM2,%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4-12 | 1 |
VMULPD %XMM5,%XMM6,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %XMM2,%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x110(%R14),%XMM4,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM1,%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD 0xbec5e(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,%XMM3,%XMM2,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %XMM4,%XMM5,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVD %XMM4,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD %XMM4,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
TEST $0x1,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43af99 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0xf9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VROUNDSD $0x9,%XMM3,%XMM3,%XMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM3,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVMSKPD %XMM3,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM0,%XMM0,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM0,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST $0x1,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43afbc <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x11c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVAPD %XMM3,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %RBX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VSHUFPD $0x1,%XMM0,%XMM0,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VROUNDSD $0x9,%XMM5,%XMM5,%XMM6 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM6,%XMM5,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x2,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43afda <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x13a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVAPD %XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %R14,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43b55a <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6ba> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VCMPSD $0x1,%XMM0,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBLENDVPD %XMM0,%XMM4,%XMM3,%XMM0 | 3 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2-3 | 1 |
VMOVUPD %XMM0,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VCMPSD $0x1,%XMM5,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBLENDVPD %XMM0,%XMM1,%XMM6,%XMM0 | 3 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2-3 | 1 |
VMOVUPD %XMM0,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV -0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2f8(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x40(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R12,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x1,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADC $0,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DECQ -0x70(%RBP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x2,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0xbeb58(%RIP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xbbb58(%RIP),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 43b0a5 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x205> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 48b780 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x68(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0xa8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 43b5f3 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x753> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43b59c <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6fc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x148,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 40cee0 <__clang_call_terminate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | exec |
nb instructions | 122 |
nb uops | 134 |
loop length | 604 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 9 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
ADD-SUB / MUL ratio | 1.50 |
micro-operation queue | 22.33 cycles |
front end | 22.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.20 | 11.17 | 11.33 | 11.33 | 11.50 | 11.23 | 11.20 | 11.50 | 11.50 | 11.50 | 11.20 | 11.33 |
cycles | 11.20 | 11.17 | 11.33 | 11.33 | 11.50 | 11.23 | 11.20 | 11.50 | 11.50 | 11.50 | 11.20 | 11.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 22.35-22.40 |
Stall cycles | 0.00 |
Front-end | 22.33 |
Dispatch | 11.50 |
Overall L1 | 22.33 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 55% |
load | 21% |
store | 100% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 71% |
all | 34% |
load | 15% |
store | 28% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 46% |
all | 11% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 19% |
load | 15% |
store | 25% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 16% |
load | 14% |
store | 16% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x148,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 48b590 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOVSXD %R12D,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA (%RAX,%RAX,2),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD 0x40(%R13),%RCX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
CMP %EAX,0x124(%R13) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
LEA 0x128(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMOVNE %RCX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0xf0(%R14),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD (%RAX),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULSD 0xd8(%R14),%XMM1,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVUPD 0xe0(%R14),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD231SD 0x8(%RAX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVDDUP 0x10(%RAX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD231SD 0x108(%R14),%XMM4,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xf8(%R14),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSHUFPD $0x1,%XMM1,%XMM1,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VUNPCKLPD 0xe8(%R14),%XMM0,%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 1 |
VMOVHPD 0x100(%R14),%XMM2,%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4-12 | 1 |
VMULPD %XMM5,%XMM6,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %XMM2,%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x110(%R14),%XMM4,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM1,%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD 0xbec5e(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,%XMM3,%XMM2,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %XMM4,%XMM5,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVD %XMM4,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD %XMM4,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
TEST $0x1,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43af99 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0xf9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VROUNDSD $0x9,%XMM3,%XMM3,%XMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM3,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVMSKPD %XMM3,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM0,%XMM0,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM0,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST $0x1,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43afbc <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x11c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVAPD %XMM3,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %RBX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VSHUFPD $0x1,%XMM0,%XMM0,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VROUNDSD $0x9,%XMM5,%XMM5,%XMM6 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM6,%XMM5,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x2,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43afda <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x13a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVAPD %XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %R14,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 43b55a <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6ba> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VCMPSD $0x1,%XMM0,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBLENDVPD %XMM0,%XMM4,%XMM3,%XMM0 | 3 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2-3 | 1 |
VMOVUPD %XMM0,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VCMPSD $0x1,%XMM5,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBLENDVPD %XMM0,%XMM1,%XMM6,%XMM0 | 3 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2-3 | 1 |
VMOVUPD %XMM0,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV -0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2f8(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x40(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R12,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x1,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADC $0,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DECQ -0x70(%RBP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x2,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0xbeb58(%RIP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xbbb58(%RIP),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 43b0a5 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x205> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 48b780 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x68(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0xa8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 43b5f3 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x753> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43b59c <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6fc> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x148,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 40cee0 <__clang_call_terminate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::einspline_spo_ref | 24.18 | 27.29 |
▼Loop 843 - MultiBsplineRef.hpp:42-71 - exec– | 0.01 | 0.01 |
▼Loop 844 - MultiBsplineRef.hpp:63-71 - exec– | 0 | 0 |
▼Loop 845 - MultiBsplineRef.hpp:64-71 - exec– | 0 | 0 |
○Loop 846 - MultiBsplineRef.hpp:68-70 - exec | 24.14 | 26.87 |
○Loop 842 - einspline_spo_ref.hpp:183-187 - exec | 0 | 0.01 |