Function: _ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6 ... | Module: libqmcwfs.so | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 22.5% |
---|
Function: _ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6 ... | Module: libqmcwfs.so | Source: einspline_spo_ref.hpp:172-189 [...] | Coverage: 22.5% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/einspline_spo_ref.hpp: 172 - 189 |
-------------------------------------------------------------------------------- |
172: ScopedTimer local_timer(timer); |
173: |
174: auto u = Lattice.toUnit_floor(P.activeR(iat)); |
175: for (int i = 0; i < nBlocks; ++i) |
176: MultiBsplineEvalRef::evaluate_v(einsplines[i], u[0], u[1], u[2], psi[i].data(), nSplinesPerBlock); |
177: } |
178: |
179: inline void evaluate(const ParticleSet& P, int iat, ValueVector_t& psi_v) |
180: { |
181: evaluate_v(P, iat); |
182: |
183: for (int i = 0; i < nBlocks; ++i) |
184: { |
185: // in real simulation, phase needs to be applied. Here just fake computation |
186: const int first = i * nBlocks; |
187: std::copy_n(psi[i].data(), std::min((i + 1) * nSplinesPerBlock, OrbitalSetSize) - first, psi_v.data() + first); |
188: } |
189: } |
/cluster/comp/gcc/13.2.0/include/c++/13.2.0/bits/stl_algo.h: 731 - 757 |
-------------------------------------------------------------------------------- |
731: { return std::copy(__first, __first + __n, __result); } |
[...] |
757: if (__n2 <= 0) |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Utilities/NewTimer.h: 242 - 249 |
-------------------------------------------------------------------------------- |
242: ScopeGuard(TIMER& t) : timer(t) { timer.start(); } |
[...] |
249: ~ScopeGuard() { timer.stop(); } |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineEvalHelper.hpp: 53 - 63 |
-------------------------------------------------------------------------------- |
53: if (x < 0) |
[...] |
60: ind = static_cast<int>(x); |
61: dx = x - ind; |
62: // upper bound |
63: if (ind > nmax) |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 143 - 143 |
-------------------------------------------------------------------------------- |
143: return (active_ptcl_ == iat) ? active_pos_ : R[iat]; |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineData.hpp: 54 - 57 |
-------------------------------------------------------------------------------- |
54: a[0] = ((A00 * tx + A01) * tx + A02) * tx + A03; |
55: a[1] = ((A10 * tx + A11) * tx + A12) * tx + A13; |
56: a[2] = ((A20 * tx + A21) * tx + A22) * tx + A23; |
57: a[3] = ((A30 * tx + A31) * tx + A32) * tx + A33; |
/cluster/comp/gcc/13.2.0/include/c++/13.2.0/bits/stl_vector.h: 1258 - 1258 |
-------------------------------------------------------------------------------- |
1258: { return _M_data_ptr(this->_M_impl._M_start); } |
/cluster/comp/gcc/13.2.0/include/c++/13.2.0/bits/stl_algobase.h: 238 - 931 |
-------------------------------------------------------------------------------- |
238: if (__b < __a) |
[...] |
398: { *__to = *__from; } |
[...] |
436: if (__builtin_expect(_Num > 1, true)) |
437: __builtin_memmove(__result, __first, sizeof(_Tp) * _Num); |
[...] |
930: for (; __first != __last; ++__first) |
931: *__first = __tmp; |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 229 - 229 |
-------------------------------------------------------------------------------- |
229: return X[i]; |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/Spline2/MultiBsplineRef.hpp: 42 - 71 |
-------------------------------------------------------------------------------- |
42: x -= spline_m->x_grid.start; |
43: y -= spline_m->y_grid.start; |
44: z -= spline_m->z_grid.start; |
45: T tx, ty, tz; |
46: int ix, iy, iz; |
47: spline2::getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); |
48: spline2::getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); |
49: spline2::getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); |
[...] |
56: const intptr_t xs = spline_m->x_stride; |
57: const intptr_t ys = spline_m->y_stride; |
58: const intptr_t zs = spline_m->z_stride; |
59: |
60: constexpr T zero(0); |
61: std::fill(vals, vals + num_splines, zero); |
62: |
63: for (size_t i = 0; i < 4; i++) |
64: for (size_t j = 0; j < 4; j++) |
65: { |
66: const T pre00 = a[i] * b[j]; |
67: const T* restrict coefs = spline_m->coefs + (ix + i) * xs + (iy + j) * ys + iz * zs; |
68: for (size_t n = 0; n < num_splines; n++) |
69: vals[n] += pre00 * |
70: (c[0] * coefs[n] + c[1] * coefs[n + zs] + c[2] * coefs[n + 2 * zs] + |
71: c[3] * coefs[n + 3 * zs]); |
0x4b330 PUSH %RBP |
0x4b331 MOV %RSP,%RBP |
0x4b334 PUSH %R15 |
0x4b336 MOV %RCX,%R15 |
0x4b339 PUSH %R14 |
0x4b33b PUSH %R13 |
0x4b33d MOVSXD %EDX,%R13 |
0x4b340 PUSH %R12 |
0x4b342 MOV %RSI,%R12 |
0x4b345 PUSH %RBX |
0x4b346 AND $-0x20,%RSP |
0x4b34a SUB $0x100,%RSP |
0x4b351 MOV 0x358(%RDI),%RBX |
0x4b358 MOV %RDI,0x98(%RSP) |
0x4b360 MOV %RBX,%RDI |
0x4b363 CALL 8590 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> |
0x4b368 MOV 0x98(%RSP),%R8 |
0x4b370 LEA 0x128(%R12),%RDX |
0x4b378 LEA 0x48(%R8),%RSI |
0x4b37c CMP 0x124(%R12),%R13D |
0x4b384 JE 4b394 |
0x4b386 MOV 0x40(%R12),%RAX |
0x4b38b LEA (%R13,%R13,2),%RDX |
0x4b390 LEA (%RAX,%RDX,8),%RDX |
0x4b394 LEA 0xa0(%RSP),%RDI |
0x4b39c MOV %R8,0x98(%RSP) |
0x4b3a4 CALL 13670 <_ZNK11qmcplusplus14CrystalLatticeIdLj3EE12toUnit_floorIdEENS_10TinyVectorIdLj3EEERKNS3_IT_Lj3EEE> |
0x4b3a9 MOV 0x98(%RSP),%RDX |
0x4b3b1 MOVSXD 0x30(%RDX),%RCX |
0x4b3b5 TEST %ECX,%ECX |
0x4b3b7 JLE 4b952 |
0x4b3bd MOV 0x2f8(%RDX),%R9 |
0x4b3c4 MOVSXD 0x40(%RDX),%R14 |
0x4b3c8 LEA 0xc0(%RSP),%R11 |
0x4b3d0 MOV %RBX,0x10(%RSP) |
0x4b3d5 MOV 0x310(%RDX),%RSI |
0x4b3dc MOV %R11,0x18(%RSP) |
0x4b3e1 LEA 0xe0(%RSP),%R12 |
0x4b3e9 LEA (,%R14,8),%RDI |
0x4b3f1 LEA (%R9,%RCX,8),%R10 |
0x4b3f5 VMOVSD 0xb0(%RSP),%XMM6 |
0x4b3fe VMOVSD 0xa8(%RSP),%XMM0 |
0x4b407 VMOVSD 0xa0(%RSP),%XMM1 |
0x4b410 MOV %RDI,0x80(%RSP) |
0x4b418 MOV %RSI,0x78(%RSP) |
0x4b41d MOV %R9,0x88(%RSP) |
0x4b425 MOV %R10,0x20(%RSP) |
0x4b42a MOV %RDX,0x8(%RSP) |
0x4b42f MOV %R15,(%RSP) |
0x4b433 MOV %R12,%R15 |
0x4b436 VMOVSD %XMM6,0x38(%RSP) |
0x4b43c VMOVSD %XMM0,0x30(%RSP) |
0x4b442 VMOVSD %XMM1,0x28(%RSP) |
(558) 0x4b448 VMOVSD 0x28(%RSP),%XMM4 |
(558) 0x4b44e VXORPD %XMM8,%XMM8,%XMM8 |
(558) 0x4b453 MOV 0x78(%RSP),%RBX |
(558) 0x4b458 MOV 0x88(%RSP),%R13 |
(558) 0x4b460 VMOVSD 0x30(%RSP),%XMM2 |
(558) 0x4b466 VMOVSD 0x38(%RSP),%XMM3 |
(558) 0x4b46c MOV (%RBX),%RCX |
(558) 0x4b46f MOV (%R13),%R12 |
(558) 0x4b473 VSUBSD 0x28(%R12),%XMM4,%XMM5 |
(558) 0x4b47a VMULSD 0x48(%R12),%XMM5,%XMM7 |
(558) 0x4b481 VSUBSD 0x50(%R12),%XMM2,%XMM10 |
(558) 0x4b488 VSUBSD 0x78(%R12),%XMM3,%XMM11 |
(558) 0x4b48f MOV 0x38(%R12),%EAX |
(558) 0x4b494 VCOMISD %XMM7,%XMM8 |
(558) 0x4b498 JA 4ba38 |
(558) 0x4b49e VCVTTSD2SI %XMM7,%R8D |
(558) 0x4b4a2 DEC %EAX |
(558) 0x4b4a4 CMP %R8D,%EAX |
(558) 0x4b4a7 JL 4ba51 |
(558) 0x4b4ad VROUNDSD $0xb,%XMM7,%XMM7,%XMM9 |
(558) 0x4b4b3 VSUBSD %XMM9,%XMM7,%XMM12 |
(558) 0x4b4b8 VMOVSD 0x15dc8(%RIP),%XMM15 |
(558) 0x4b4c0 MOVSXD %R8D,%RDX |
(558) 0x4b4c3 VMULSD 0x15db5(%RIP),%XMM12,%XMM14 |
(558) 0x4b4cb VMOVSD 0x1514d(%RIP),%XMM1 |
(558) 0x4b4d3 MOV %RDX,0x98(%RSP) |
(558) 0x4b4db VXORPD 0x1529d(%RIP),%XMM14,%XMM3 |
(558) 0x4b4e3 VMULSD %XMM12,%XMM12,%XMM13 |
(558) 0x4b4e8 VMOVSD %XMM12,%XMM12,%XMM2 |
(558) 0x4b4ec VMOVSD %XMM12,%XMM12,%XMM6 |
(558) 0x4b4f0 VFMADD213SD 0x15daf(%RIP),%XMM15,%XMM6 |
(558) 0x4b4f9 VFNMADD132SD %XMM15,%XMM15,%XMM2 |
(558) 0x4b4fe VFMADD213SD 0x15121(%RIP),%XMM13,%XMM6 |
(558) 0x4b507 VADDSD %XMM15,%XMM14,%XMM0 |
(558) 0x4b50c VFMADD213SD 0x15d7b(%RIP),%XMM12,%XMM0 |
(558) 0x4b515 VMULSD %XMM13,%XMM3,%XMM4 |
(558) 0x4b51a VFMADD231SD %XMM2,%XMM12,%XMM15 |
(558) 0x4b51f VFMADD132SD %XMM12,%XMM1,%XMM0 |
(558) 0x4b524 VFMADD132SD %XMM15,%XMM1,%XMM12 |
(558) 0x4b529 VUNPCKLPD %XMM6,%XMM0,%XMM7 |
(558) 0x4b52d VUNPCKLPD %XMM4,%XMM12,%XMM5 |
(558) 0x4b531 VINSERTF128 $0x1,%XMM5,%YMM7,%YMM12 |
(558) 0x4b537 VMULSD 0x70(%R12),%XMM10,%XMM10 |
(558) 0x4b53e VXORPD %XMM8,%XMM8,%XMM8 |
(558) 0x4b543 MOV 0x60(%R12),%EAX |
(558) 0x4b548 VCOMISD %XMM10,%XMM8 |
(558) 0x4b54d JA 4ba1f |
(558) 0x4b553 VCVTTSD2SI %XMM10,%EDI |
(558) 0x4b558 DEC %EAX |
(558) 0x4b55a CMP %EDI,%EAX |
(558) 0x4b55c JL 4ba94 |
(558) 0x4b562 VROUNDSD $0xb,%XMM10,%XMM10,%XMM9 |
(558) 0x4b568 VSUBSD %XMM9,%XMM10,%XMM13 |
(558) 0x4b56d VMOVSD 0x15d13(%RIP),%XMM6 |
(558) 0x4b575 MOVSXD %EDI,%RSI |
(558) 0x4b578 VMOVSD 0x150a0(%RIP),%XMM15 |
(558) 0x4b580 MOV %RSI,0x90(%RSP) |
(558) 0x4b588 VMULSD %XMM15,%XMM13,%XMM0 |
(558) 0x4b58d VMOVSD %XMM13,%XMM13,%XMM3 |
(558) 0x4b591 VMOVSD %XMM13,%XMM13,%XMM1 |
(558) 0x4b595 VMULSD %XMM13,%XMM13,%XMM14 |
(558) 0x4b59a VFNMADD132SD %XMM6,%XMM6,%XMM3 |
(558) 0x4b59f VFMADD213SD 0x15d00(%RIP),%XMM6,%XMM1 |
(558) 0x4b5a8 VFMADD213SD 0x15077(%RIP),%XMM14,%XMM1 |
(558) 0x4b5b1 VSUBSD %XMM0,%XMM6,%XMM2 |
(558) 0x4b5b5 VFMADD213SD 0x15cd2(%RIP),%XMM13,%XMM2 |
(558) 0x4b5be VMULSD %XMM14,%XMM0,%XMM4 |
(558) 0x4b5c3 VFMADD231SD %XMM3,%XMM13,%XMM6 |
(558) 0x4b5c8 VFMADD132SD %XMM13,%XMM15,%XMM2 |
(558) 0x4b5cd VFMADD132SD %XMM6,%XMM15,%XMM13 |
(558) 0x4b5d2 VUNPCKLPD %XMM1,%XMM2,%XMM7 |
(558) 0x4b5d6 VUNPCKLPD %XMM4,%XMM13,%XMM5 |
(558) 0x4b5da VINSERTF128 $0x1,%XMM5,%YMM7,%YMM10 |
(558) 0x4b5e0 VMULSD 0x98(%R12),%XMM11,%XMM11 |
(558) 0x4b5ea VXORPD %XMM8,%XMM8,%XMM8 |
(558) 0x4b5ef MOV 0x88(%R12),%EAX |
(558) 0x4b5f7 VCOMISD %XMM11,%XMM8 |
(558) 0x4b5fc JA 4b9f5 |
(558) 0x4b602 VCVTTSD2SI %XMM11,%R9D |
(558) 0x4b607 DEC %EAX |
(558) 0x4b609 CMP %R9D,%EAX |
(558) 0x4b60c JL 4ba68 |
(558) 0x4b612 VROUNDSD $0xb,%XMM11,%XMM11,%XMM9 |
(558) 0x4b618 VSUBSD %XMM9,%XMM11,%XMM13 |
(558) 0x4b61d VMOVSD 0x15c63(%RIP),%XMM1 |
(558) 0x4b625 MOVSXD %R9D,%R10 |
(558) 0x4b628 VMULSD 0x15c50(%RIP),%XMM13,%XMM14 |
(558) 0x4b630 VMOVSD 0x14fe8(%RIP),%XMM15 |
(558) 0x4b638 MOV %R10,0x70(%RSP) |
(558) 0x4b63d VXORPD 0x1513b(%RIP),%XMM14,%XMM2 |
(558) 0x4b645 VMULSD %XMM13,%XMM13,%XMM0 |
(558) 0x4b64a VMOVSD %XMM13,%XMM13,%XMM6 |
(558) 0x4b64e VMOVSD %XMM13,%XMM13,%XMM5 |
(558) 0x4b652 VFMADD213SD 0x15c4d(%RIP),%XMM1,%XMM5 |
(558) 0x4b65b VFNMADD132SD %XMM1,%XMM1,%XMM6 |
(558) 0x4b660 VFMADD213SD 0x14fbf(%RIP),%XMM0,%XMM5 |
(558) 0x4b669 VADDSD %XMM1,%XMM14,%XMM4 |
(558) 0x4b66d VFMADD213SD 0x15c1a(%RIP),%XMM13,%XMM4 |
(558) 0x4b676 VMULSD %XMM0,%XMM2,%XMM3 |
(558) 0x4b67a VFMADD231SD %XMM6,%XMM13,%XMM1 |
(558) 0x4b67f VFMADD132SD %XMM13,%XMM15,%XMM4 |
(558) 0x4b684 VFMADD132SD %XMM13,%XMM15,%XMM1 |
(558) 0x4b689 CMPQ $0,0x80(%RSP) |
(558) 0x4b692 MOV 0x10(%R12),%R8 |
(558) 0x4b697 VMOVAPD %YMM12,0xc0(%RSP) |
(558) 0x4b6a0 MOV 0x18(%R12),%R10 |
(558) 0x4b6a5 MOV 0x20(%R12),%RBX |
(558) 0x4b6aa VMOVAPD %YMM10,0xe0(%RSP) |
(558) 0x4b6b3 JE 4b711 |
(558) 0x4b6b5 MOV %R10,0x60(%RSP) |
(558) 0x4b6ba MOV 0x80(%RSP),%RDX |
(558) 0x4b6c2 XOR %ESI,%ESI |
(558) 0x4b6c4 MOV %RCX,%RDI |
(558) 0x4b6c7 MOV %R8,0x68(%RSP) |
(558) 0x4b6cc VMOVSD %XMM3,0x40(%RSP) |
(558) 0x4b6d2 VMOVSD %XMM1,0x48(%RSP) |
(558) 0x4b6d8 VMOVSD %XMM5,0x50(%RSP) |
(558) 0x4b6de VMOVSD %XMM4,0x58(%RSP) |
(558) 0x4b6e4 VZEROUPPER |
(558) 0x4b6e7 CALL 80e0 <memset@plt> |
(558) 0x4b6ec MOV 0x68(%RSP),%R8 |
(558) 0x4b6f1 MOV 0x60(%RSP),%R10 |
(558) 0x4b6f6 VMOVSD 0x58(%RSP),%XMM4 |
(558) 0x4b6fc VMOVSD 0x50(%RSP),%XMM5 |
(558) 0x4b702 MOV %RAX,%RCX |
(558) 0x4b705 VMOVSD 0x48(%RSP),%XMM1 |
(558) 0x4b70b VMOVSD 0x40(%RSP),%XMM3 |
(558) 0x4b711 MOV 0x98(%RSP),%RDX |
(558) 0x4b719 MOV 0x90(%RSP),%R9 |
(558) 0x4b721 LEA (,%R8,8),%R13 |
(558) 0x4b729 MOV 0x70(%RSP),%RAX |
(558) 0x4b72e MOV 0x8(%R12),%R12 |
(558) 0x4b733 MOV %R13,0x90(%RSP) |
(558) 0x4b73b IMUL %R8,%RDX |
(558) 0x4b73f MOV 0x18(%RSP),%R11 |
(558) 0x4b744 IMUL %R10,%R9 |
(558) 0x4b748 SAL $0x3,%R10 |
(558) 0x4b74c IMUL %RBX,%RAX |
(558) 0x4b750 LEA (%RDX,%RBX,1),%RSI |
(558) 0x4b754 SAL $0x3,%RBX |
(558) 0x4b758 ADD %RSI,%R9 |
(558) 0x4b75b ADD %R9,%RAX |
(558) 0x4b75e LEA (%R12,%RAX,8),%R8 |
(558) 0x4b762 LEA 0x100(%RSP),%R12 |
(557) 0x4b76a LEA (%RBX,%R8,1),%RSI |
(557) 0x4b76e VMOVSD (%R11),%XMM12 |
(557) 0x4b773 MOV %R15,%R13 |
(557) 0x4b776 MOV %R8,%RDX |
(557) 0x4b779 MOV %R8,0x98(%RSP) |
(557) 0x4b781 LEA (%RSI,%RBX,1),%RDI |
(556) 0x4b785 VMULSD (%R13),%XMM12,%XMM10 |
(556) 0x4b78b MOV %RDX,%R8 |
(556) 0x4b78e XOR %EAX,%EAX |
(556) 0x4b790 SUB %RBX,%R8 |
(556) 0x4b793 TEST %R14,%R14 |
(556) 0x4b796 JE 4b8e9 |
(556) 0x4b79c MOV %R14,%R9 |
(556) 0x4b79f AND $0x3,%R9D |
(556) 0x4b7a3 JE 4b833 |
(556) 0x4b7a9 CMP $0x1,%R9 |
(556) 0x4b7ad JE 4b802 |
(556) 0x4b7af CMP $0x2,%R9 |
(556) 0x4b7b3 JE 4b7d9 |
(556) 0x4b7b5 VMULSD (%RDX),%XMM5,%XMM11 |
(556) 0x4b7b9 MOV $0x1,%EAX |
(556) 0x4b7be VMULSD (%RDI),%XMM3,%XMM7 |
(556) 0x4b7c2 VFMADD231SD (%R8),%XMM4,%XMM11 |
(556) 0x4b7c7 VFMADD231SD (%RSI),%XMM1,%XMM7 |
(556) 0x4b7cc VADDSD %XMM7,%XMM11,%XMM8 |
(556) 0x4b7d0 VFMADD213SD (%RCX),%XMM10,%XMM8 |
(556) 0x4b7d5 VMOVSD %XMM8,(%RCX) |
(556) 0x4b7d9 VMULSD (%RDX,%RAX,8),%XMM5,%XMM9 |
(556) 0x4b7de VMULSD (%RDI,%RAX,8),%XMM3,%XMM13 |
(556) 0x4b7e3 VFMADD231SD (%R8,%RAX,8),%XMM4,%XMM9 |
(556) 0x4b7e9 VFMADD231SD (%RSI,%RAX,8),%XMM1,%XMM13 |
(556) 0x4b7ef VADDSD %XMM13,%XMM9,%XMM14 |
(556) 0x4b7f4 VFMADD213SD (%RCX,%RAX,8),%XMM10,%XMM14 |
(556) 0x4b7fa VMOVSD %XMM14,(%RCX,%RAX,8) |
(556) 0x4b7ff INC %RAX |
(556) 0x4b802 VMULSD (%RDX,%RAX,8),%XMM5,%XMM15 |
(556) 0x4b807 VMULSD (%RDI,%RAX,8),%XMM3,%XMM0 |
(556) 0x4b80c VFMADD231SD (%R8,%RAX,8),%XMM4,%XMM15 |
(556) 0x4b812 VFMADD231SD (%RSI,%RAX,8),%XMM1,%XMM0 |
(556) 0x4b818 VADDSD %XMM0,%XMM15,%XMM6 |
(556) 0x4b81c VFMADD213SD (%RCX,%RAX,8),%XMM10,%XMM6 |
(556) 0x4b822 VMOVSD %XMM6,(%RCX,%RAX,8) |
(556) 0x4b827 INC %RAX |
(556) 0x4b82a CMP %RAX,%R14 |
(556) 0x4b82d JE 4b8e9 |
(559) 0x4b833 VMULSD (%RDX,%RAX,8),%XMM5,%XMM2 |
(559) 0x4b838 VMULSD (%RDI,%RAX,8),%XMM3,%XMM11 |
(559) 0x4b83d VFMADD231SD (%R8,%RAX,8),%XMM4,%XMM2 |
(559) 0x4b843 VMULSD 0x8(%RDX,%RAX,8),%XMM5,%XMM8 |
(559) 0x4b849 VFMADD231SD (%RSI,%RAX,8),%XMM1,%XMM11 |
(559) 0x4b84f VMULSD 0x8(%RDI,%RAX,8),%XMM3,%XMM9 |
(559) 0x4b855 VFMADD231SD 0x8(%R8,%RAX,8),%XMM4,%XMM8 |
(559) 0x4b85c VMULSD 0x10(%RDX,%RAX,8),%XMM5,%XMM14 |
(559) 0x4b862 VFMADD231SD 0x8(%RSI,%RAX,8),%XMM1,%XMM9 |
(559) 0x4b869 VMULSD 0x10(%RDI,%RAX,8),%XMM3,%XMM15 |
(559) 0x4b86f VFMADD231SD 0x10(%R8,%RAX,8),%XMM4,%XMM14 |
(559) 0x4b876 VMULSD 0x18(%RDX,%RAX,8),%XMM5,%XMM6 |
(559) 0x4b87c VFMADD231SD 0x10(%RSI,%RAX,8),%XMM1,%XMM15 |
(559) 0x4b883 VADDSD %XMM11,%XMM2,%XMM7 |
(559) 0x4b888 VMULSD 0x18(%RDI,%RAX,8),%XMM3,%XMM2 |
(559) 0x4b88e VFMADD231SD 0x18(%R8,%RAX,8),%XMM4,%XMM6 |
(559) 0x4b895 VFMADD231SD 0x18(%RSI,%RAX,8),%XMM1,%XMM2 |
(559) 0x4b89c VFMADD213SD (%RCX,%RAX,8),%XMM10,%XMM7 |
(559) 0x4b8a2 VADDSD %XMM9,%XMM8,%XMM13 |
(559) 0x4b8a7 VFMADD213SD 0x8(%RCX,%RAX,8),%XMM10,%XMM13 |
(559) 0x4b8ae VADDSD %XMM15,%XMM14,%XMM0 |
(559) 0x4b8b3 VFMADD213SD 0x10(%RCX,%RAX,8),%XMM10,%XMM0 |
(559) 0x4b8ba VADDSD %XMM2,%XMM6,%XMM11 |
(559) 0x4b8be VFMADD213SD 0x18(%RCX,%RAX,8),%XMM10,%XMM11 |
(559) 0x4b8c5 VMOVSD %XMM7,(%RCX,%RAX,8) |
(559) 0x4b8ca VMOVSD %XMM13,0x8(%RCX,%RAX,8) |
(559) 0x4b8d0 VMOVSD %XMM0,0x10(%RCX,%RAX,8) |
(559) 0x4b8d6 VMOVSD %XMM11,0x18(%RCX,%RAX,8) |
(559) 0x4b8dc ADD $0x4,%RAX |
(559) 0x4b8e0 CMP %RAX,%R14 |
(559) 0x4b8e3 JNE 4b833 |
(556) 0x4b8e9 ADD $0x8,%R13 |
(556) 0x4b8ed ADD %R10,%RDX |
(556) 0x4b8f0 ADD %R10,%RSI |
(556) 0x4b8f3 ADD %R10,%RDI |
(556) 0x4b8f6 CMP %R12,%R13 |
(556) 0x4b8f9 JNE 4b785 |
(557) 0x4b8ff MOV 0x98(%RSP),%R8 |
(557) 0x4b907 MOV 0x90(%RSP),%R13 |
(557) 0x4b90f ADD $0x8,%R11 |
(557) 0x4b913 ADD %R13,%R8 |
(557) 0x4b916 CMP %R11,%R15 |
(557) 0x4b919 JNE 4b76a |
(558) 0x4b91f ADDQ $0x8,0x88(%RSP) |
(558) 0x4b928 MOV 0x88(%RSP),%RSI |
(558) 0x4b930 ADDQ $0x18,0x78(%RSP) |
(558) 0x4b936 CMP %RSI,0x20(%RSP) |
(558) 0x4b93b JNE 4b448 |
0x4b941 MOV 0x10(%RSP),%RBX |
0x4b946 MOV 0x8(%RSP),%RDX |
0x4b94b MOV (%RSP),%R15 |
0x4b94f VZEROUPPER |
0x4b952 MOV %RBX,%RDI |
0x4b955 MOV %RDX,0x98(%RSP) |
0x4b95d XOR %R12D,%R12D |
0x4b960 XOR %R14D,%R14D |
0x4b963 CALL 8480 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> |
0x4b968 MOV 0x98(%RSP),%R13 |
0x4b970 MOV 0x30(%R13),%ECX |
0x4b974 TEST %ECX,%ECX |
0x4b976 JLE 4b9e6 |
(555) 0x4b978 MOV 0x40(%R13),%R10D |
(555) 0x4b97c MOV %R14D,%EDI |
(555) 0x4b97f INC %R14D |
(555) 0x4b982 VMOVD 0x8(%R13),%XMM3 |
(555) 0x4b988 IMUL %ECX,%EDI |
(555) 0x4b98b IMUL %R14D,%R10D |
(555) 0x4b98f VMOVD %EDI,%XMM12 |
(555) 0x4b993 VMOVD %R10D,%XMM1 |
(555) 0x4b998 VPMINSD %XMM3,%XMM1,%XMM4 |
(555) 0x4b99d VPSUBD %XMM12,%XMM4,%XMM5 |
(555) 0x4b9a2 VMOVD %XMM5,%EAX |
(555) 0x4b9a6 TEST %EAX,%EAX |
(555) 0x4b9a8 JLE 4b9dd |
(555) 0x4b9aa MOV 0x18(%R15),%R9 |
(555) 0x4b9ae MOV 0x310(%R13),%R8 |
(555) 0x4b9b5 CLTQ |
(555) 0x4b9b7 MOVSXD %EDI,%R11 |
(555) 0x4b9ba LEA (,%RAX,8),%RDX |
(555) 0x4b9c2 MOV (%R8,%R12,1),%RSI |
(555) 0x4b9c6 LEA (%R9,%R11,8),%RDI |
(555) 0x4b9ca CMP $0x8,%RDX |
(555) 0x4b9ce JE 4baab |
(555) 0x4b9d4 CALL 8090 <memmove@plt> |
(555) 0x4b9d9 MOV 0x30(%R13),%ECX |
(555) 0x4b9dd ADD $0x18,%R12 |
(555) 0x4b9e1 CMP %ECX,%R14D |
(555) 0x4b9e4 JL 4b978 |
0x4b9e6 LEA -0x28(%RBP),%RSP |
0x4b9ea POP %RBX |
0x4b9eb POP %R12 |
0x4b9ed POP %R13 |
0x4b9ef POP %R14 |
0x4b9f1 POP %R15 |
0x4b9f3 POP %RBP |
0x4b9f4 RET |
(558) 0x4b9f5 VMOVSD 0x14c23(%RIP),%XMM1 |
(558) 0x4b9fd MOV 0x14c24(%RIP),%RBX |
(558) 0x4ba04 VXORPD %XMM3,%XMM3,%XMM3 |
(558) 0x4ba08 MOVQ $0,0x70(%RSP) |
(558) 0x4ba11 VMOVQ %RBX,%XMM5 |
(558) 0x4ba16 VMOVSD %XMM1,%XMM1,%XMM4 |
(558) 0x4ba1a JMP 4b689 |
(558) 0x4ba1f MOVQ $0,0x90(%RSP) |
(558) 0x4ba2b VMOVAPD 0x14bed(%RIP),%YMM10 |
(558) 0x4ba33 JMP 4b5e0 |
(558) 0x4ba38 MOVQ $0,0x98(%RSP) |
(558) 0x4ba44 VMOVAPD 0x14bd4(%RIP),%YMM12 |
(558) 0x4ba4c JMP 4b537 |
(558) 0x4ba51 CLTQ |
(558) 0x4ba53 VMOVAPD 0x14be5(%RIP),%YMM12 |
(558) 0x4ba5b MOV %RAX,0x98(%RSP) |
(558) 0x4ba63 JMP 4b537 |
(558) 0x4ba68 MOV 0x14bb9(%RIP),%R11 |
(558) 0x4ba6f CLTQ |
(558) 0x4ba71 VXORPD %XMM4,%XMM4,%XMM4 |
(558) 0x4ba75 VMOVSD 0x14bdb(%RIP),%XMM3 |
(558) 0x4ba7d MOV %RAX,0x70(%RSP) |
(558) 0x4ba82 VMOVSD 0x14bbe(%RIP),%XMM5 |
(558) 0x4ba8a VMOVQ %R11,%XMM1 |
(558) 0x4ba8f JMP 4b689 |
(558) 0x4ba94 CLTQ |
(558) 0x4ba96 VMOVAPD 0x14ba2(%RIP),%YMM10 |
(558) 0x4ba9e MOV %RAX,0x90(%RSP) |
(558) 0x4baa6 JMP 4b5e0 |
(555) 0x4baab VMOVSD (%RSI),%XMM10 |
(555) 0x4baaf VMOVSD %XMM10,(%RDI) |
(555) 0x4bab3 JMP 4b9dd |
0x4bab8 NOPL (%RAX,%RAX,1) |
Path / |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | libqmcwfs.so |
nb instructions | 75 |
nb uops | 79 |
loop length | 358 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 18 |
micro-operation queue | 13.17 cycles |
front end | 13.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.00 | 2.13 | 8.00 | 8.00 | 11.50 | 1.93 | 2.00 | 11.50 | 11.50 | 11.50 | 1.93 | 8.00 |
cycles | 2.00 | 2.13 | 8.00 | 8.00 | 11.50 | 1.93 | 2.00 | 11.50 | 11.50 | 11.50 | 1.93 | 8.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 12.66 |
Stall cycles | 0.00 |
Front-end | 13.17 |
Dispatch | 11.50 |
Overall L1 | 13.17 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 20% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 12% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOVSXD %EDX,%R13 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x100,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x358(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 8590 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x98(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x128(%R12),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x48(%R8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP 0x124(%R12),%R13D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 4b394 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x64> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x40(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R13,%R13,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RAX,%RDX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0xa0(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 13670 <_ZNK11qmcplusplus14CrystalLatticeIdLj3EE12toUnit_floorIdEENS_10TinyVectorIdLj3EEERKNS3_IT_Lj3EEE> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x98(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x30(%RDX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4b952 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x622> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x2f8(%RDX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x40(%RDX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0xc0(%RSP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0xe0(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%R14,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R9,%RCX,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0xb0(%RSP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xa8(%RSP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xa0(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM6,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM0,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM1,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 8480 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x98(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R13),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4b9e6 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6b6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | einspline_spo_ref.hpp:172-189 |
Module | libqmcwfs.so |
nb instructions | 75 |
nb uops | 79 |
loop length | 358 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 18 |
micro-operation queue | 13.17 cycles |
front end | 13.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.00 | 2.13 | 8.00 | 8.00 | 11.50 | 1.93 | 2.00 | 11.50 | 11.50 | 11.50 | 1.93 | 8.00 |
cycles | 2.00 | 2.13 | 8.00 | 8.00 | 11.50 | 1.93 | 2.00 | 11.50 | 11.50 | 11.50 | 1.93 | 8.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 12.66 |
Stall cycles | 0.00 |
Front-end | 13.17 |
Dispatch | 11.50 |
Overall L1 | 13.17 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 20% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 20% |
all | 12% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 11% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOVSXD %EDX,%R13 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x100,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x358(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 8590 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE5startEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x98(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x128(%R12),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x48(%R8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP 0x124(%R12),%R13D | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JE 4b394 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x64> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x40(%R12),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R13,%R13,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RAX,%RDX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0xa0(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 13670 <_ZNK11qmcplusplus14CrystalLatticeIdLj3EE12toUnit_floorIdEENS_10TinyVectorIdLj3EEERKNS3_IT_Lj3EEE> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x98(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x30(%RDX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4b952 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x622> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x2f8(%RDX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x40(%RDX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0xc0(%RSP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x310(%RDX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0xe0(%RSP),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%R14,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R9,%RCX,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0xb0(%RSP),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xa8(%RSP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0xa0(%RSP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM6,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM0,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD %XMM1,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RSP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDX,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 8480 <_ZN11qmcplusplus9TimerTypeINSt6chrono3_V212system_clockEE4stopEv@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x98(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%R13),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %ECX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4b9e6 <_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE+0x6b6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN16miniqmcreference17einspline_spo_refIdE8evaluateERKN11qmcplusplus11ParticleSetEiRNS2_6VectorIdSaIdEEE– | 22.5 | 25.01 |
▼Loop 558 - einspline_spo_ref.hpp:175-189 - libqmcwfs.so– | 0.01 | 0.02 |
▼Loop 557 - MultiBsplineRef.hpp:63-71 - libqmcwfs.so– | 0 | 0 |
▼Loop 556 - MultiBsplineRef.hpp:64-71 - libqmcwfs.so– | 0.01 | 0.01 |
○Loop 559 - MultiBsplineRef.hpp:68-71 - libqmcwfs.so | 22.46 | 23.93 |
○Loop 555 - einspline_spo_ref.hpp:183-187 - libqmcwfs.so | 0 | 0 |