Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: exec | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 23.13% |
---|
Function: miniqmcreference::einspline_spo_ref<double>::evaluate(qmcplusplus::ParticleSet const&, int ... | Module: exec | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 23.13% |
---|
/scratch_na/users/xoserete/qaas_runs/171-284-5201/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVectorTensorOps.h: 150 - 152 |
-------------------------------------------------------------------------------- |
150: return TinyVector<Type_t, 3>(lhs[0] * rhs[0] + lhs[1] * rhs[3] + lhs[2] * rhs[6], |
151: lhs[0] * rhs[1] + lhs[1] * rhs[4] + lhs[2] * rhs[7], |
152: lhs[0] * rhs[2] + lhs[1] * rhs[5] + lhs[2] * rhs[8]); |
/scratch_na/users/xoserete/qaas_runs/171-284-5201/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/Tensor.h: 213 - 213 |
-------------------------------------------------------------------------------- |
213: inline Type_t operator[](unsigned int i) const { return X[i]; } |
/scratch_na/users/xoserete/qaas_runs/171-284-5201/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/TinyVector.h: 61 - 61 |
-------------------------------------------------------------------------------- |
61: for (size_t d = 0; d < D; ++d) |
/scratch_na/users/xoserete/qaas_runs/171-284-5201/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineEvalHelper.hpp: 45 - 47 |
-------------------------------------------------------------------------------- |
45: T sf = std::floor(x); |
46: T dx2 = x - sf; |
47: int ind2 = std::min(std::max(0, static_cast<int>(sf)), nmax); |
/scratch_na/users/xoserete/qaas_runs/171-284-5201/intel/miniqmc/build/miniqmc/src/Particle/Lattice/CrystalLattice.h: 170 - 170 |
-------------------------------------------------------------------------------- |
170: if (-std::numeric_limits<T1>::epsilon() < val_dot[i] && val_dot[i] < 0) |
/scratch_na/users/xoserete/qaas_runs/171-284-5201/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineRef.hpp: 42 - 71 |
-------------------------------------------------------------------------------- |
42: x -= spline_m->x_grid.start; |
43: y -= spline_m->y_grid.start; |
44: z -= spline_m->z_grid.start; |
45: T tx, ty, tz; |
46: int ix, iy, iz; |
47: spline2::getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); |
48: spline2::getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); |
49: spline2::getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); |
[...] |
56: const intptr_t xs = spline_m->x_stride; |
57: const intptr_t ys = spline_m->y_stride; |
58: const intptr_t zs = spline_m->z_stride; |
59: |
60: constexpr T zero(0); |
61: std::fill(vals, vals + num_splines, zero); |
62: |
63: for (size_t i = 0; i < 4; i++) |
64: for (size_t j = 0; j < 4; j++) |
65: { |
66: const T pre00 = a[i] * b[j]; |
67: const T* restrict coefs = spline_m->coefs + (ix + i) * xs + (iy + j) * ys + iz * zs; |
68: for (size_t n = 0; n < num_splines; n++) |
69: vals[n] += pre00 * |
70: (c[0] * coefs[n] + c[1] * coefs[n + zs] + c[2] * coefs[n + 2 * zs] + |
71: c[3] * coefs[n + 3 * zs]); |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/stl_algobase.h: 200 - 696 |
-------------------------------------------------------------------------------- |
200: if (__b < __a) |
[...] |
366: const ptrdiff_t _Num = __last - __first; |
367: if (_Num) |
368: __builtin_memmove(__result, __first, sizeof(_Tp) * _Num); |
[...] |
695: for (; __first != __last; ++__first) |
696: *__first = __tmp; |
/scratch_na/users/xoserete/qaas_runs/171-284-5201/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 143 - 143 |
-------------------------------------------------------------------------------- |
143: return (active_ptcl_ == iat) ? active_pos_ : R[iat]; |
/scratch_na/users/xoserete/qaas_runs/171-284-5201/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/einspline_spo_ref.hpp: 172 - 189 |
-------------------------------------------------------------------------------- |
172: ScopedTimer local_timer(timer); |
173: |
174: auto u = Lattice.toUnit_floor(P.activeR(iat)); |
175: for (int i = 0; i < nBlocks; ++i) |
176: MultiBsplineEvalRef::evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); |
177: } |
178: |
179: inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) |
180: { |
181: evaluate_v(P, iat); |
182: |
183: for (int i = 0; i < nBlocks; ++i) |
184: { |
185: // in real simulation, phase needs to be applied. Here just fake computation |
186: const int first = i * nBlocks; |
187: std::copy_n(psi[i].data(), std::min((i + 1) * nSplinesPerBlock, OrbitalSetSize) - first, psi_v.data() + first); |
188: } |
189: } |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/stl_vector.h: 933 - 1056 |
-------------------------------------------------------------------------------- |
933: return *(this->_M_impl._M_start + __n); |
[...] |
1056: { return _M_data_ptr(this->_M_impl._M_start); } |
/scratch_na/users/xoserete/qaas_runs/171-284-5201/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineData.hpp: 54 - 57 |
-------------------------------------------------------------------------------- |
54: a[0] = ((A00 * tx + A01) * tx + A02) * tx + A03; |
55: a[1] = ((A10 * tx + A11) * tx + A12) * tx + A13; |
56: a[2] = ((A20 * tx + A21) * tx + A22) * tx + A23; |
57: a[3] = ((A30 * tx + A31) * tx + A32) * tx + A33; |
/scratch_na/users/xoserete/qaas_runs/171-284-5201/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
/usr/lib/gcc/x86_64-redhat-linux/8/../../../../include/c++/8/bits/stl_algo.h: 782 - 782 |
-------------------------------------------------------------------------------- |
782: { return std::copy(__first, __first + __n, __result); } |
0x434320 PUSH %RBP |
0x434321 MOV %RSP,%RBP |
0x434324 PUSH %R15 |
0x434326 PUSH %R14 |
0x434328 PUSH %R13 |
0x43432a PUSH %R12 |
0x43432c PUSH %RBX |
0x43432d SUB $0x148,%RSP |
0x434334 MOV %RCX,%RBX |
0x434337 MOV %EDX,%R12D |
0x43433a MOV %RSI,%R13 |
0x43433d MOV %RDI,%R14 |
0x434340 MOV 0x358(%RDI),%RDI |
0x434347 MOV %RDI,-0xb0(%RBP) |
0x43434e CALL 47f980 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> |
0x434353 LEA 0x128(%R13),%RAX |
0x43435a MOVSXD %R12D,%RCX |
0x43435d LEA (%RCX,%RCX,2),%RDX |
0x434361 SAL $0x3,%RDX |
0x434365 ADD 0x40(%R13),%RDX |
0x434369 CMP %ECX,0x124(%R13) |
0x434370 CMOVE %RAX,%RDX |
0x434374 VMOVSD 0xf0(%R14),%XMM0 |
0x43437d VMOVUPD (%RDX),%XMM1 |
0x434381 VMULSD 0xd8(%R14),%XMM1,%XMM3 |
0x43438a VMOVUPD 0xe0(%R14),%XMM2 |
0x434393 VFMADD231SD 0x8(%RDX),%XMM0,%XMM3 |
0x434399 VMOVDDUP 0x10(%RDX),%XMM4 |
0x43439e VFMADD231SD 0x108(%R14),%XMM4,%XMM3 |
0x4343a7 VMOVSD 0xf8(%R14),%XMM0 |
0x4343b0 VSHUFPD $0x1,%XMM1,%XMM1,%XMM5 |
0x4343b5 VUNPCKLPD 0xe8(%R14),%XMM0,%XMM6 |
0x4343be VMOVHPD 0x100(%R14),%XMM2,%XMM0 |
0x4343c7 VMULPD %XMM5,%XMM6,%XMM2 |
0x4343cb VFMADD213PD %XMM2,%XMM1,%XMM0 |
0x4343d0 VFMADD231PD 0x110(%R14),%XMM4,%XMM0 |
0x4343d9 VXORPD %XMM1,%XMM1,%XMM1 |
0x4343dd VCMPPD $0x1,%XMM1,%XMM3,%XMM4 |
0x4343e2 VMOVSD 0xb9abe(%RIP),%XMM2 |
0x4343ea VCMPPD $0x1,%XMM3,%XMM2,%XMM5 |
0x4343ef VANDPD %XMM4,%XMM5,%XMM4 |
0x4343f3 VMOVD %XMM4,%EAX |
0x4343f7 VXORPD %XMM4,%XMM4,%XMM4 |
0x4343fb VMOVUPD %XMM4,-0x110(%RBP) |
0x434403 TEST $0x1,%AL |
0x434405 JNE 434419 |
0x434407 VROUNDSD $0x9,%XMM3,%XMM3,%XMM4 |
0x43440d VSUBSD %XMM4,%XMM3,%XMM3 |
0x434411 VMOVUPD %XMM3,-0x110(%RBP) |
0x434419 VXORPD %XMM3,%XMM3,%XMM3 |
0x43441d VCMPPD $0x1,%XMM3,%XMM0,%XMM3 |
0x434422 VMOVMSKPD %XMM3,%EAX |
0x434426 VROUNDSD $0x9,%XMM0,%XMM0,%XMM3 |
0x43442c VSUBSD %XMM3,%XMM0,%XMM3 |
0x434430 VXORPD %XMM4,%XMM4,%XMM4 |
0x434434 TEST $0x1,%AL |
0x434436 JNE 43443c |
0x434438 VMOVAPD %XMM3,%XMM4 |
0x43443c MOV %RBX,-0xa8(%RBP) |
0x434443 VSHUFPD $0x1,%XMM0,%XMM0,%XMM5 |
0x434448 VROUNDSD $0x9,%XMM5,%XMM5,%XMM6 |
0x43444e VSUBSD %XMM6,%XMM5,%XMM6 |
0x434452 TEST $0x2,%AL |
0x434454 JNE 43445a |
0x434456 VMOVAPD %XMM6,%XMM1 |
0x43445a MOV %R14,-0x68(%RBP) |
0x43445e MOV 0x30(%R14),%EAX |
0x434462 MOV %RAX,-0x70(%RBP) |
0x434466 TEST %EAX,%EAX |
0x434468 JLE 4349cb |
0x43446e VCMPSD $0x1,%XMM0,%XMM2,%XMM0 |
0x434473 VBLENDVPD %XMM0,%XMM4,%XMM3,%XMM0 |
0x434479 VMOVUPD %XMM0,-0x130(%RBP) |
0x434481 VCMPSD $0x1,%XMM5,%XMM2,%XMM0 |
0x434486 VBLENDVPD %XMM0,%XMM1,%XMM6,%XMM0 |
0x43448c VMOVUPD %XMM0,-0x120(%RBP) |
0x434494 MOV -0x68(%RBP),%RAX |
0x434498 MOV 0x2f8(%RAX),%RCX |
0x43449f MOV %RCX,-0xc8(%RBP) |
0x4344a6 MOV 0x310(%RAX),%RCX |
0x4344ad MOV %RCX,-0xc0(%RBP) |
0x4344b4 MOVSXD 0x40(%RAX),%R12 |
0x4344b8 LEA (,%R12,8),%RAX |
0x4344c0 MOV %RAX,-0xb8(%RBP) |
0x4344c7 CMP $0x1,%R12 |
0x4344cb MOV %R12,%R15 |
0x4344ce ADC $0,%R15 |
0x4344d2 DECQ -0x70(%RBP) |
0x4344d6 MOV %R15,%RAX |
0x4344d9 SHR $0x1,%RAX |
0x4344dc MOV %RAX,-0xf0(%RBP) |
0x4344e3 MOV %R15,-0xf8(%RBP) |
0x4344ea AND $-0x2,%R15 |
0x4344ee XOR %ECX,%ECX |
0x4344f0 VMOVSD 0xb99b8(%RIP),%XMM10 |
0x4344f8 VMOVSD 0xb6958(%RIP),%XMM11 |
0x434500 JMP 434528 |
0x434502 NOPW %CS:(%RAX,%RAX,1) |
(745) 0x434510 MOV -0xe8(%RBP),%RCX |
(745) 0x434517 LEA 0x1(%RCX),%RAX |
(745) 0x43451b CMP -0x70(%RBP),%RCX |
(745) 0x43451f MOV %RAX,%RCX |
(745) 0x434522 JE 4349cb |
(745) 0x434528 MOV -0xc8(%RBP),%RAX |
(745) 0x43452f MOV (%RAX,%RCX,8),%R14 |
(745) 0x434533 MOV %RCX,-0xe8(%RBP) |
(745) 0x43453a LEA (%RCX,%RCX,2),%RAX |
(745) 0x43453e MOV -0xc0(%RBP),%RCX |
(745) 0x434545 MOV (%RCX,%RAX,8),%R13 |
(745) 0x434549 VMOVUPD -0x110(%RBP),%XMM0 |
(745) 0x434551 VSUBSD 0x28(%R14),%XMM0,%XMM0 |
(745) 0x434557 VMOVUPD -0x130(%RBP),%XMM1 |
(745) 0x43455f VSUBSD 0x50(%R14),%XMM1,%XMM1 |
(745) 0x434565 VMOVUPD -0x120(%RBP),%XMM2 |
(745) 0x43456d VSUBSD 0x78(%R14),%XMM2,%XMM2 |
(745) 0x434573 VMULSD 0x48(%R14),%XMM0,%XMM0 |
(745) 0x434579 MOVSXD 0x38(%R14),%R8 |
(745) 0x43457d VROUNDSD $0x9,%XMM0,%XMM0,%XMM14 |
(745) 0x434583 VMULSD 0x70(%R14),%XMM1,%XMM1 |
(745) 0x434589 VSUBSD %XMM14,%XMM0,%XMM3 |
(745) 0x43458e MOVSXD 0x60(%R14),%R9 |
(745) 0x434592 VMULSD %XMM3,%XMM10,%XMM4 |
(745) 0x434596 VSUBSD %XMM4,%XMM11,%XMM0 |
(745) 0x43459a VMULSD %XMM3,%XMM3,%XMM5 |
(745) 0x43459e VMOVDDUP %XMM3,%XMM6 |
(745) 0x4345a2 VMOVDDUP 0xb68ae(%RIP),%XMM8 |
(745) 0x4345aa VBLENDPD $0x1,%XMM0,%XMM8,%XMM0 |
(745) 0x4345b0 VMULPD %XMM0,%XMM6,%XMM7 |
(745) 0x4345b4 VMOVUPD 0xb9844(%RIP),%XMM9 |
(745) 0x4345bc VADDPD %XMM7,%XMM9,%XMM0 |
(745) 0x4345c0 VUNPCKLPD %XMM5,%XMM6,%XMM6 |
(745) 0x4345c4 VMOVUPD 0xb9844(%RIP),%XMM12 |
(745) 0x4345cc VFMADD213PD %XMM12,%XMM0,%XMM6 |
(745) 0x4345d1 VROUNDSD $0x9,%XMM1,%XMM1,%XMM15 |
(745) 0x4345d7 VSUBSD %XMM15,%XMM1,%XMM1 |
(745) 0x4345dc VMULSD 0x98(%R14),%XMM2,%XMM0 |
(745) 0x4345e5 MOVSXD 0x88(%R14),%RBX |
(745) 0x4345ec VMOVUPD %XMM6,-0x170(%RBP) |
(745) 0x4345f4 VSHUFPD $0x1,%XMM7,%XMM7,%XMM2 |
(745) 0x4345f9 VSUBSD %XMM2,%XMM11,%XMM2 |
(745) 0x4345fd VFMADD213SD %XMM11,%XMM3,%XMM2 |
(745) 0x434602 VFMADD213SD %XMM10,%XMM3,%XMM2 |
(745) 0x434607 VMOVSD %XMM2,-0x160(%RBP) |
(745) 0x43460f VMULSD %XMM5,%XMM4,%XMM2 |
(745) 0x434613 VMOVSD %XMM2,-0x158(%RBP) |
(745) 0x43461b VMULSD %XMM1,%XMM10,%XMM2 |
(745) 0x43461f VSUBSD %XMM2,%XMM11,%XMM3 |
(745) 0x434623 VMULSD %XMM1,%XMM1,%XMM4 |
(745) 0x434627 VMOVDDUP %XMM1,%XMM5 |
(745) 0x43462b VBLENDPD $0x1,%XMM3,%XMM8,%XMM3 |
(745) 0x434631 VMULPD %XMM3,%XMM5,%XMM3 |
(745) 0x434635 VADDPD %XMM3,%XMM9,%XMM6 |
(745) 0x434639 VUNPCKLPD %XMM4,%XMM5,%XMM5 |
(745) 0x43463d VFMADD213PD %XMM12,%XMM6,%XMM5 |
(745) 0x434642 VMOVUPD %XMM5,-0x150(%RBP) |
(745) 0x43464a VSHUFPD $0x1,%XMM3,%XMM3,%XMM3 |
(745) 0x43464f VSUBSD %XMM3,%XMM11,%XMM3 |
(745) 0x434653 VFMADD213SD %XMM11,%XMM1,%XMM3 |
(745) 0x434658 VFMADD213SD %XMM10,%XMM1,%XMM3 |
(745) 0x43465d VMOVSD %XMM3,-0x140(%RBP) |
(745) 0x434665 VROUNDSD $0x9,%XMM0,%XMM0,%XMM3 |
(745) 0x43466b VMULSD %XMM4,%XMM2,%XMM1 |
(745) 0x43466f VMOVSD %XMM1,-0x138(%RBP) |
(745) 0x434677 VSUBSD %XMM3,%XMM0,%XMM2 |
(745) 0x43467b VMULSD %XMM2,%XMM10,%XMM1 |
(745) 0x43467f VSUBSD %XMM1,%XMM11,%XMM12 |
(745) 0x434683 VFMADD213SD 0xb67ec(%RIP),%XMM2,%XMM12 |
(745) 0x43468c VMULSD %XMM2,%XMM11,%XMM0 |
(745) 0x434690 VADDSD 0xb9820(%RIP),%XMM0,%XMM13 |
(745) 0x434698 VMULSD %XMM2,%XMM2,%XMM4 |
(745) 0x43469c VFMADD213SD 0xb981b(%RIP),%XMM4,%XMM13 |
(745) 0x4346a5 MOV 0x10(%R14),%RAX |
(745) 0x4346a9 MOV %RAX,-0x78(%RBP) |
(745) 0x4346ad MOV 0x18(%R14),%RAX |
(745) 0x4346b1 MOV %RAX,-0x80(%RBP) |
(745) 0x4346b5 MOV 0x20(%R14),%R10 |
(745) 0x4346b9 MOV $0x1fffffffffffffff,%RAX |
(745) 0x4346c3 TEST %RAX,%R12 |
(745) 0x4346c6 JE 43477d |
(745) 0x4346cc MOV %R13,%RDI |
(745) 0x4346cf XOR %ESI,%ESI |
(745) 0x4346d1 MOV -0xb8(%RBP),%RDX |
(745) 0x4346d8 VMOVUPD %XMM12,-0xa0(%RBP) |
(745) 0x4346e0 VMOVUPD %XMM13,-0x90(%RBP) |
(745) 0x4346e8 MOV %R8,-0x60(%RBP) |
(745) 0x4346ec MOV %R9,-0x58(%RBP) |
(745) 0x4346f0 MOV %R10,-0x50(%RBP) |
(745) 0x4346f4 VMOVSD %XMM14,-0x48(%RBP) |
(745) 0x4346f9 VMOVSD %XMM15,-0x40(%RBP) |
(745) 0x4346fe VMOVSD %XMM1,-0x38(%RBP) |
(745) 0x434703 VMOVSD %XMM2,-0x30(%RBP) |
(745) 0x434708 VMOVSD %XMM3,-0xe0(%RBP) |
(745) 0x434710 VMOVSD %XMM4,-0xd8(%RBP) |
(745) 0x434718 VMOVSD %XMM0,-0xd0(%RBP) |
(745) 0x434720 CALL 4dc0f0 <_intel_fast_memset> |
(745) 0x434725 VMOVSD -0xd0(%RBP),%XMM0 |
(745) 0x43472d VMOVSD -0xd8(%RBP),%XMM4 |
(745) 0x434735 VMOVSD -0xe0(%RBP),%XMM3 |
(745) 0x43473d VMOVSD -0x30(%RBP),%XMM2 |
(745) 0x434742 VMOVSD -0x38(%RBP),%XMM1 |
(745) 0x434747 VMOVSD -0x40(%RBP),%XMM15 |
(745) 0x43474c VMOVSD -0x48(%RBP),%XMM14 |
(745) 0x434751 MOV -0x50(%RBP),%R10 |
(745) 0x434755 MOV -0x58(%RBP),%R9 |
(745) 0x434759 MOV -0x60(%RBP),%R8 |
(745) 0x43475d VMOVUPD -0x90(%RBP),%XMM13 |
(745) 0x434765 VMOVUPD -0xa0(%RBP),%XMM12 |
(745) 0x43476d VMOVSD 0xb66e3(%RIP),%XMM11 |
(745) 0x434775 VMOVSD 0xb9733(%RIP),%XMM10 |
(745) 0x43477d VCVTTSD2SI %XMM14,%EDX |
(745) 0x434782 VCVTTSD2SI %XMM15,%ESI |
(745) 0x434787 VCVTTSD2SI %XMM3,%ECX |
(745) 0x43478b VFMADD213SD %XMM10,%XMM2,%XMM12 |
(745) 0x434790 VSUBSD %XMM0,%XMM11,%XMM0 |
(745) 0x434794 VFMADD213SD %XMM11,%XMM2,%XMM0 |
(745) 0x434799 VFMADD213SD %XMM10,%XMM2,%XMM0 |
(745) 0x43479e MOV 0x8(%R14),%RAX |
(745) 0x4347a2 VMULSD %XMM4,%XMM1,%XMM1 |
(745) 0x4347a6 DEC %RBX |
(745) 0x4347a9 MOV %ECX,%EDI |
(745) 0x4347ab SAR $0x1f,%EDI |
(745) 0x4347ae ANDN %ECX,%EDI,%ECX |
(745) 0x4347b3 CMP %RCX,%RBX |
(745) 0x4347b6 CMOVGE %RCX,%RBX |
(745) 0x4347ba MOV %R10,%RCX |
(745) 0x4347bd IMUL %RBX,%RCX |
(745) 0x4347c1 DEC %R9 |
(745) 0x4347c4 MOV %ESI,%EDI |
(745) 0x4347c6 SAR $0x1f,%EDI |
(745) 0x4347c9 ANDN %ESI,%EDI,%ESI |
(745) 0x4347ce CMP %RSI,%R9 |
(745) 0x4347d1 CMOVGE %RSI,%R9 |
(745) 0x4347d5 MOV -0x80(%RBP),%RDI |
(745) 0x4347d9 IMUL %RDI,%R9 |
(745) 0x4347dd DEC %R8 |
(745) 0x4347e0 MOV %EDX,%ESI |
(745) 0x4347e2 SAR $0x1f,%ESI |
(745) 0x4347e5 ANDN %EDX,%ESI,%EDX |
(745) 0x4347ea CMP %RDX,%R8 |
(745) 0x4347ed CMOVGE %RDX,%R8 |
(745) 0x4347f1 MOV -0x78(%RBP),%RSI |
(745) 0x4347f5 IMUL %RSI,%R8 |
(745) 0x4347f9 ADD %R9,%R8 |
(745) 0x4347fc MOV %RBX,%R11 |
(745) 0x4347ff LEA (%R10,%R10,2),%RBX |
(745) 0x434803 VMOVDDUP %XMM13,%XMM2 |
(745) 0x434808 VMOVDDUP %XMM12,%XMM3 |
(745) 0x43480d VMOVDDUP %XMM0,%XMM4 |
(745) 0x434811 VMOVDDUP %XMM1,%XMM5 |
(745) 0x434815 LEA (%R8,%RCX,1),%RDX |
(745) 0x434819 LEA (%RDX,%R15,1),%R9 |
(745) 0x43481d MOV %R9,-0xa0(%RBP) |
(745) 0x434824 LEA (%R10,%RCX,1),%R9 |
(745) 0x434828 ADD %R8,%R9 |
(745) 0x43482b ADD %R15,%R9 |
(745) 0x43482e MOV %R9,-0x90(%RBP) |
(745) 0x434835 LEA (%RCX,%R10,2),%R9 |
(745) 0x434839 ADD %R8,%R9 |
(745) 0x43483c ADD %R15,%R9 |
(745) 0x43483f MOV %R9,-0x60(%RBP) |
(745) 0x434843 ADD %RCX,%RBX |
(745) 0x434846 ADD %R8,%RBX |
(745) 0x434849 ADD %R15,%RBX |
(745) 0x43484c LEA 0x3(%R11),%RCX |
(745) 0x434850 IMUL %R10,%RCX |
(745) 0x434854 ADD %R8,%RCX |
(745) 0x434857 LEA (%RAX,%RCX,8),%R9 |
(745) 0x43485b LEA (,%RSI,8),%RCX |
(745) 0x434863 MOV %RCX,-0x30(%RBP) |
(745) 0x434867 LEA 0x2(%R11),%RCX |
(745) 0x43486b IMUL %R10,%RCX |
(745) 0x43486f INC %R11 |
(745) 0x434872 IMUL %R10,%R11 |
(745) 0x434876 LEA (,%RDI,8),%R10 |
(745) 0x43487e ADD %R8,%RCX |
(745) 0x434881 LEA (%RAX,%RCX,8),%R14 |
(745) 0x434885 ADD %R8,%R11 |
(745) 0x434888 LEA (%RAX,%R11,8),%RCX |
(745) 0x43488c LEA (%RAX,%RDX,8),%R8 |
(745) 0x434890 MOV %R9,%RDX |
(745) 0x434893 MOV %RCX,%R9 |
(745) 0x434896 XOR %ECX,%ECX |
(745) 0x434898 JMP 4348d2 |
0x43489a NOPW (%RAX,%RAX,1) |
(746) 0x4348a0 MOV -0x38(%RBP),%RDI |
(746) 0x4348a4 LEA 0x1(%RDI),%RCX |
(746) 0x4348a8 MOV -0x58(%RBP),%RDX |
(746) 0x4348ac MOV -0x30(%RBP),%RSI |
(746) 0x4348b0 ADD %RSI,%RDX |
(746) 0x4348b3 MOV -0x50(%RBP),%R14 |
(746) 0x4348b7 ADD %RSI,%R14 |
(746) 0x4348ba MOV -0x48(%RBP),%R9 |
(746) 0x4348be ADD %RSI,%R9 |
(746) 0x4348c1 MOV -0x40(%RBP),%R8 |
(746) 0x4348c5 ADD %RSI,%R8 |
(746) 0x4348c8 CMP $0x3,%RDI |
(746) 0x4348cc JE 434510 |
(746) 0x4348d2 VMOVSD -0x170(%RBP,%RCX,8),%XMM6 |
(746) 0x4348db MOV -0x78(%RBP),%R11 |
(746) 0x4348df MOV %RCX,-0x38(%RBP) |
(746) 0x4348e3 IMUL %RCX,%R11 |
(746) 0x4348e7 MOV %R8,-0x40(%RBP) |
(746) 0x4348eb MOV %R9,-0x48(%RBP) |
(746) 0x4348ef MOV %R14,-0x50(%RBP) |
(746) 0x4348f3 MOV %RDX,-0x58(%RBP) |
(746) 0x4348f7 MOV %RDX,%RCX |
(746) 0x4348fa XOR %EDX,%EDX |
(746) 0x4348fc JMP 434919 |
0x4348fe XCHG %AX,%AX |
(747) 0x434900 LEA 0x1(%RDX),%RSI |
(747) 0x434904 ADD %R10,%RCX |
(747) 0x434907 ADD %R10,%R14 |
(747) 0x43490a ADD %R10,%R9 |
(747) 0x43490d ADD %R10,%R8 |
(747) 0x434910 CMP $0x3,%RDX |
(747) 0x434914 MOV %RSI,%RDX |
(747) 0x434917 JE 4348a0 |
(747) 0x434919 TEST %R12D,%R12D |
(747) 0x43491c JE 434900 |
(747) 0x43491e VMULSD -0x150(%RBP,%RDX,8),%XMM6,%XMM7 |
(747) 0x434927 CMP $0x1,%R12D |
(747) 0x43492b JE 43496f |
(747) 0x43492d VMOVDDUP %XMM7,%XMM8 |
(747) 0x434931 MOV -0xf0(%RBP),%RSI |
(747) 0x434938 XOR %EDI,%EDI |
(747) 0x43493a NOPW (%RAX,%RAX,1) |
(748) 0x434940 VMULPD (%R8,%RDI,1),%XMM3,%XMM9 |
(748) 0x434946 VFMADD231PD (%R9,%RDI,1),%XMM2,%XMM9 |
(748) 0x43494c VFMADD231PD (%R14,%RDI,1),%XMM4,%XMM9 |
(748) 0x434952 VFMADD231PD (%RCX,%RDI,1),%XMM5,%XMM9 |
(748) 0x434958 VFMADD213PD (%R13,%RDI,1),%XMM8,%XMM9 |
(748) 0x43495f VMOVUPD %XMM9,(%R13,%RDI,1) |
(748) 0x434966 ADD $0x10,%RDI |
(748) 0x43496a DEC %RSI |
(748) 0x43496d JNE 434940 |
(747) 0x43496f CMP -0xf8(%RBP),%R15 |
(747) 0x434976 JE 434900 |
(747) 0x434978 MOV -0x80(%RBP),%RSI |
(747) 0x43497c IMUL %RDX,%RSI |
(747) 0x434980 ADD %R11,%RSI |
(747) 0x434983 MOV -0xa0(%RBP),%RDI |
(747) 0x43498a ADD %RSI,%RDI |
(747) 0x43498d VMULSD (%RAX,%RDI,8),%XMM12,%XMM8 |
(747) 0x434992 MOV -0x90(%RBP),%RDI |
(747) 0x434999 ADD %RSI,%RDI |
(747) 0x43499c VFMADD231SD (%RAX,%RDI,8),%XMM13,%XMM8 |
(747) 0x4349a2 MOV -0x60(%RBP),%RDI |
(747) 0x4349a6 ADD %RSI,%RDI |
(747) 0x4349a9 VFMADD231SD (%RAX,%RDI,8),%XMM0,%XMM8 |
(747) 0x4349af ADD %RBX,%RSI |
(747) 0x4349b2 VFMADD231SD (%RAX,%RSI,8),%XMM1,%XMM8 |
(747) 0x4349b8 VFMADD213SD (%R13,%R15,8),%XMM7,%XMM8 |
(747) 0x4349bf VMOVSD %XMM8,(%R13,%R15,8) |
(747) 0x4349c6 JMP 434900 |
0x4349cb MOV -0xb0(%RBP),%RDI |
0x4349d2 CALL 47fb70 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> |
0x4349d7 MOV -0x68(%RBP),%R12 |
0x4349db MOV 0x30(%R12),%EAX |
0x4349e0 TEST %EAX,%EAX |
0x4349e2 MOV -0xa8(%RBP),%R15 |
0x4349e9 JLE 434a63 |
0x4349eb XOR %EBX,%EBX |
0x4349ed XOR %R14D,%R14D |
0x4349f0 JMP 434a0c |
0x4349f2 NOPW %CS:(%RAX,%RAX,1) |
(744) 0x434a00 MOVSXD %EAX,%RCX |
(744) 0x434a03 ADD $0x18,%RBX |
(744) 0x434a07 CMP %RCX,%R14 |
(744) 0x434a0a JGE 434a63 |
(744) 0x434a0c MOV %R14D,%ECX |
(744) 0x434a0f IMUL %EAX,%ECX |
(744) 0x434a12 INC %R14 |
(744) 0x434a15 MOV 0x40(%R12),%EDX |
(744) 0x434a1a IMUL %R14D,%EDX |
(744) 0x434a1e MOV 0x8(%R12),%ESI |
(744) 0x434a23 CMP %EDX,%ESI |
(744) 0x434a25 CMOVL %ESI,%EDX |
(744) 0x434a28 SUB %ECX,%EDX |
(744) 0x434a2a MOVSXD %EDX,%RDX |
(744) 0x434a2d MOV $0x1fffffffffffffff,%RSI |
(744) 0x434a37 TEST %RSI,%RDX |
(744) 0x434a3a JE 434a00 |
(744) 0x434a3c MOV 0x310(%R12),%RAX |
(744) 0x434a44 MOV (%RAX,%RBX,1),%RSI |
(744) 0x434a48 SAL $0x3,%RDX |
(744) 0x434a4c MOVSXD %ECX,%RDI |
(744) 0x434a4f SAL $0x3,%RDI |
(744) 0x434a53 ADD 0x18(%R15),%RDI |
(744) 0x434a57 CALL 4037b0 <memmove@plt> |
(744) 0x434a5c MOV 0x30(%R12),%EAX |
(744) 0x434a61 JMP 434a00 |
0x434a63 ADD $0x148,%RSP |
0x434a6a POP %RBX |
0x434a6b POP %R12 |
0x434a6d POP %R13 |
0x434a6f POP %R14 |
0x434a71 POP %R15 |
0x434a73 POP %RBP |
0x434a74 RET |
0x434a75 MOV %RAX,%RDI |
0x434a78 CALL 409f50 <__clang_call_terminate> |
0x434a7d NOPL (%RAX) |
Path / |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | exec |
nb instructions | 122 |
nb uops | 134 |
loop length | 586 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 9 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
ADD-SUB / MUL ratio | 1.50 |
micro-operation queue | 22.33 cycles |
front end | 22.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.20 | 11.17 | 11.33 | 11.33 | 11.50 | 11.23 | 11.20 | 11.50 | 11.50 | 11.50 | 11.20 | 11.33 |
cycles | 11.20 | 11.17 | 11.33 | 11.33 | 11.50 | 11.23 | 11.20 | 11.50 | 11.50 | 11.50 | 11.20 | 11.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 22.37-22.44 |
Stall cycles | 0.00 |
Front-end | 22.33 |
Dispatch | 11.50 |
Overall L1 | 22.33 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 55% |
load | 21% |
store | 100% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 71% |
all | 37% |
load | 17% |
store | 28% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 11% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 19% |
load | 15% |
store | 25% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 16% |
load | 14% |
store | 16% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x148,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 47f980 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA 0x128(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD %R12D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA (%RCX,%RCX,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD 0x40(%R13),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
CMP %ECX,0x124(%R13) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
CMOVE %RAX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0xf0(%R14),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD (%RDX),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULSD 0xd8(%R14),%XMM1,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVUPD 0xe0(%R14),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD231SD 0x8(%RDX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVDDUP 0x10(%RDX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD231SD 0x108(%R14),%XMM4,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xf8(%R14),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSHUFPD $0x1,%XMM1,%XMM1,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VUNPCKLPD 0xe8(%R14),%XMM0,%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 1 |
VMOVHPD 0x100(%R14),%XMM2,%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4-12 | 1 |
VMULPD %XMM5,%XMM6,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %XMM2,%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x110(%R14),%XMM4,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM1,%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD 0xb9abe(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,%XMM3,%XMM2,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %XMM4,%XMM5,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVD %XMM4,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD %XMM4,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
TEST $0x1,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 434419 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0xf9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VROUNDSD $0x9,%XMM3,%XMM3,%XMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM3,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVMSKPD %XMM3,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM0,%XMM0,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM0,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST $0x1,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43443c <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x11c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVAPD %XMM3,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %RBX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VSHUFPD $0x1,%XMM0,%XMM0,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VROUNDSD $0x9,%XMM5,%XMM5,%XMM6 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM6,%XMM5,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x2,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43445a <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x13a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVAPD %XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %R14,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4349cb <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6ab> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VCMPSD $0x1,%XMM0,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBLENDVPD %XMM0,%XMM4,%XMM3,%XMM0 | 3 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2-3 | 1 |
VMOVUPD %XMM0,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VCMPSD $0x1,%XMM5,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBLENDVPD %XMM0,%XMM1,%XMM6,%XMM0 | 3 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2-3 | 1 |
VMOVUPD %XMM0,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV -0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2f8(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x40(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R12,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x1,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADC $0,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DECQ -0x70(%RBP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x2,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0xb99b8(%RIP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xb6958(%RIP),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 434528 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x208> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 47fb70 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x68(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0xa8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 434a63 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x743> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 434a0c <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6ec> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x148,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 409f50 <__clang_call_terminate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | exec |
nb instructions | 122 |
nb uops | 134 |
loop length | 586 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 9 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 12 |
ADD-SUB / MUL ratio | 1.50 |
micro-operation queue | 22.33 cycles |
front end | 22.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.20 | 11.17 | 11.33 | 11.33 | 11.50 | 11.23 | 11.20 | 11.50 | 11.50 | 11.50 | 11.20 | 11.33 |
cycles | 11.20 | 11.17 | 11.33 | 11.33 | 11.50 | 11.23 | 11.20 | 11.50 | 11.50 | 11.50 | 11.20 | 11.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 22.37-22.44 |
Stall cycles | 0.00 |
Front-end | 22.33 |
Dispatch | 11.50 |
Overall L1 | 22.33 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 55% |
load | 21% |
store | 100% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 71% |
all | 37% |
load | 17% |
store | 28% |
mul | 50% |
add-sub | 0% |
fma | 50% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 11% |
load | 10% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 19% |
load | 15% |
store | 25% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 21% |
all | 16% |
load | 14% |
store | 16% |
mul | 18% |
add-sub | 12% |
fma | 18% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 17% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x148,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %EDX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x358(%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 47f980 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
LEA 0x128(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOVSXD %R12D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA (%RCX,%RCX,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
ADD 0x40(%R13),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
CMP %ECX,0x124(%R13) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
CMOVE %RAX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VMOVSD 0xf0(%R14),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD (%RDX),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMULSD 0xd8(%R14),%XMM1,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVUPD 0xe0(%R14),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VFMADD231SD 0x8(%RDX),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVDDUP 0x10(%RDX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD231SD 0x108(%R14),%XMM4,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD 0xf8(%R14),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VSHUFPD $0x1,%XMM1,%XMM1,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VUNPCKLPD 0xe8(%R14),%XMM0,%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 1 |
VMOVHPD 0x100(%R14),%XMM2,%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4-12 | 1 |
VMULPD %XMM5,%XMM6,%XMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD213PD %XMM2,%XMM1,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD231PD 0x110(%R14),%XMM4,%XMM0 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM1,%XMM3,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD 0xb9abe(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VCMPPD $0x1,%XMM3,%XMM2,%XMM5 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %XMM4,%XMM5,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVD %XMM4,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVUPD %XMM4,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
TEST $0x1,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 434419 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0xf9> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VROUNDSD $0x9,%XMM3,%XMM3,%XMM4 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMOVUPD %XMM3,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VCMPPD $0x1,%XMM3,%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVMSKPD %XMM3,%EAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VROUNDSD $0x9,%XMM0,%XMM0,%XMM3 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM3,%XMM0,%XMM3 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST $0x1,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43443c <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x11c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVAPD %XMM3,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %RBX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VSHUFPD $0x1,%XMM0,%XMM0,%XMM5 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VROUNDSD $0x9,%XMM5,%XMM5,%XMM6 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VSUBSD %XMM6,%XMM5,%XMM6 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
TEST $0x2,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 43445a <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x13a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVAPD %XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
MOV %R14,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4349cb <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6ab> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VCMPSD $0x1,%XMM0,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBLENDVPD %XMM0,%XMM4,%XMM3,%XMM0 | 3 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2-3 | 1 |
VMOVUPD %XMM0,-0x130(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VCMPSD $0x1,%XMM5,%XMM2,%XMM0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VBLENDVPD %XMM0,%XMM1,%XMM6,%XMM0 | 3 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2-3 | 1 |
VMOVUPD %XMM0,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
MOV -0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2f8(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD 0x40(%RAX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%R12,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x1,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R12,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADC $0,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
DECQ -0x70(%RBP) | 3 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x2,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0xb99b8(%RIP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xb6958(%RIP),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 434528 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x208> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb0(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 47fb70 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x68(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EAX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV -0xa8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JLE 434a63 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x743> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EBX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 434a0c <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6ec> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x148,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 409f50 <__clang_call_terminate> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼miniqmcreference::einspline_spo_ref | 23.13 | 26.04 |
▼Loop 745 - MultiBsplineRef.hpp:42-71 - exec– | 0.01 | 0.01 |
▼Loop 746 - MultiBsplineRef.hpp:63-71 - exec– | 0 | 0.01 |
▼Loop 747 - MultiBsplineRef.hpp:64-71 - exec– | 0 | 0 |
○Loop 748 - MultiBsplineRef.hpp:68-70 - exec | 23.1 | 25.52 |
○Loop 744 - einspline_spo_ref.hpp:183-187 - exec | 0 | 0.01 |