Function: _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKN ... | Module: libqmcwfs.so | Source: TwoBodyJastrowRef.h:254-279 [...] | Coverage: 0.56% |
---|
Function: _ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKN ... | Module: libqmcwfs.so | Source: TwoBodyJastrowRef.h:254-279 [...] | Coverage: 0.56% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/TwoBodyJastrowRef.h: 254 - 279 |
-------------------------------------------------------------------------------- |
254: inline void TwoBodyJastrowRef<FT>::computeU3(const ParticleSet& P, |
[...] |
262: const int jelmax = triangle ? iat : N; |
[...] |
268: const int igt = P.GroupID[iat] * NumGroups; |
269: for (int jg = 0; jg < NumGroups; ++jg) |
270: { |
271: const FuncType& f2(*F[igt + jg]); |
[...] |
279: } |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Particle/ParticleSet.h: 313 - 313 |
-------------------------------------------------------------------------------- |
313: inline int first(int igroup) const { return (*group_offsets_)[igroup]; } |
/cluster/comp/gcc/13.2.0/include/c++/13.2.0/bits/stl_vector.h: 1145 - 1145 |
-------------------------------------------------------------------------------- |
1145: return *(this->_M_impl._M_start + __n); |
/cluster/comp/gcc/13.2.0/include/c++/13.2.0/bits/stl_algobase.h: 238 - 1128 |
-------------------------------------------------------------------------------- |
238: if (__b < __a) |
[...] |
931: *__first = __tmp; |
[...] |
1123: if (__n <= 0) |
1124: return __first; |
1125: |
1126: __glibcxx_requires_can_increment(__first, __n); |
1127: |
1128: std::__fill_a(__first, __first + __n, __value); |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/Numerics/OhmmsPETE/OhmmsVector.h: 221 - 223 |
-------------------------------------------------------------------------------- |
221: inline Type_t& operator[](size_t i) |
222: { |
223: return X[i]; |
/home/eoseret/qaas_runs_CPU_9468/171-145-9236/intel/miniqmc/build/miniqmc/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h: 276 - 336 |
-------------------------------------------------------------------------------- |
276: real_type dSquareDeltaRinv = DeltaRInv * DeltaRInv; |
277: constexpr real_type cOne(1); |
278: |
279: // START_MARK_FIRST(); |
280: |
281: ASSUME_ALIGNED(distIndices); |
282: ASSUME_ALIGNED(distArrayCompressed); |
283: int iCount = 0; |
284: int iLimit = iEnd - iStart; |
[...] |
291: for (int jat = 0; jat < iLimit; jat++) |
292: { |
293: real_type r = distArray[jat]; |
294: if (r < cutoff_radius && iStart + jat != iat) |
295: { |
296: distIndices[iCount] = jat; |
297: distArrayCompressed[iCount] = r; |
298: iCount++; |
[...] |
305: real_type r = distArrayCompressed[j]; |
306: int iScatter = distIndices[j]; |
307: real_type rinv = cOne / r; |
308: r *= DeltaRInv; |
309: int iGather = (int)r; |
310: real_type t = r - real_type(iGather); |
311: real_type tp0 = t * t * t; |
312: real_type tp1 = t * t; |
313: real_type tp2 = t; |
314: |
315: real_type sCoef0 = SplineCoefs[iGather + 0]; |
316: real_type sCoef1 = SplineCoefs[iGather + 1]; |
317: real_type sCoef2 = SplineCoefs[iGather + 2]; |
318: real_type sCoef3 = SplineCoefs[iGather + 3]; |
319: |
320: // clang-format off |
321: laplArray[iScatter] = dSquareDeltaRinv * |
322: (sCoef0*( d2A[ 2]*tp2 + d2A[ 3])+ |
323: sCoef1*( d2A[ 6]*tp2 + d2A[ 7])+ |
324: sCoef2*( d2A[10]*tp2 + d2A[11])+ |
325: sCoef3*( d2A[14]*tp2 + d2A[15])); |
326: |
327: gradArray[iScatter] = DeltaRInv * rinv * |
328: (sCoef0*( dA[ 1]*tp1 + dA[ 2]*tp2 + dA[ 3])+ |
329: sCoef1*( dA[ 5]*tp1 + dA[ 6]*tp2 + dA[ 7])+ |
330: sCoef2*( dA[ 9]*tp1 + dA[10]*tp2 + dA[11])+ |
331: sCoef3*( dA[13]*tp1 + dA[14]*tp2 + dA[15])); |
332: |
333: valArray[iScatter] = (sCoef0*(A[ 0]*tp0 + A[ 1]*tp1 + A[ 2]*tp2 + A[ 3])+ |
334: sCoef1*(A[ 4]*tp0 + A[ 5]*tp1 + A[ 6]*tp2 + A[ 7])+ |
335: sCoef2*(A[ 8]*tp0 + A[ 9]*tp1 + A[10]*tp2 + A[11])+ |
336: sCoef3*(A[12]*tp0 + A[13]*tp1 + A[14]*tp2 + A[15])); |
0x3bec0 PUSH %RBP |
0x3bec1 MOV %RDI,%R11 |
0x3bec4 MOV %RSI,%R10 |
0x3bec7 MOV %RSP,%RBP |
0x3beca PUSH %R15 |
0x3becc PUSH %R14 |
0x3bece PUSH %R13 |
0x3bed0 MOV %EDX,%R13D |
0x3bed3 PUSH %R12 |
0x3bed5 MOV %R9,%R12 |
0x3bed8 PUSH %RBX |
0x3bed9 MOV %R8,%RBX |
0x3bedc AND $-0x20,%RSP |
0x3bee0 SUB $0xa0,%RSP |
0x3bee7 CMPB $0,0x18(%RBP) |
0x3beeb MOV 0x10(%RBP),%R14 |
0x3beef MOV %RCX,(%RSP) |
0x3bef3 MOV %EDX,0x18(%RSP) |
0x3bef7 JNE 3bf03 |
0x3bef9 MOV 0x90(%RDI),%EAX |
0x3beff MOV %EAX,0x18(%RSP) |
0x3bf03 MOV 0x18(%RSP),%EDX |
0x3bf07 TEST %EDX,%EDX |
0x3bf09 JLE 3bf55 |
0x3bf0b MOVSXD 0x18(%RSP),%R15 |
0x3bf10 XOR %ESI,%ESI |
0x3bf12 MOV %RBX,%RDI |
0x3bf15 MOV %R10,0x60(%RSP) |
0x3bf1a MOV %R11,0x80(%RSP) |
0x3bf22 SAL $0x3,%R15 |
0x3bf26 MOV %R15,%RDX |
0x3bf29 CALL 80e0 <memset@plt> |
0x3bf2e MOV %R15,%RDX |
0x3bf31 XOR %ESI,%ESI |
0x3bf33 MOV %R12,%RDI |
0x3bf36 CALL 80e0 <memset@plt> |
0x3bf3b MOV %R15,%RDX |
0x3bf3e XOR %ESI,%ESI |
0x3bf40 MOV %R14,%RDI |
0x3bf43 CALL 80e0 <memset@plt> |
0x3bf48 MOV 0x80(%RSP),%R11 |
0x3bf50 MOV 0x60(%RSP),%R10 |
0x3bf55 MOV 0x18(%R10),%RSI |
0x3bf59 MOVSXD %R13D,%RCX |
0x3bf5c MOV 0xa0(%R11),%RDI |
0x3bf63 MOV (%RSI,%RCX,4),%R8D |
0x3bf67 MOV %RDI,0x10(%RSP) |
0x3bf6c IMUL %EDI,%R8D |
0x3bf70 MOVSXD %R8D,%R9 |
0x3bf73 TEST %RDI,%RDI |
0x3bf76 JE 3c514 |
0x3bf7c MOV 0x200(%R11),%RDX |
0x3bf83 MOV 0x268(%R10),%RAX |
0x3bf8a MOV %R13D,0x1c(%RSP) |
0x3bf8f MOV 0x1e8(%R11),%R10 |
0x3bf96 MOV 0x1d0(%R11),%RDI |
0x3bf9d LEA (%RDX,%R9,8),%R11 |
0x3bfa1 MOV 0x18(%RAX),%R15 |
0x3bfa5 VMOVSD 0x24bb3(%RIP),%XMM15 |
0x3bfad XOR %R9D,%R9D |
0x3bfb0 MOV %R11,0x8(%RSP) |
0x3bfb5 NOPL (%RAX) |
(393) 0x3bfb8 MOV 0x8(%RSP),%R13 |
(393) 0x3bfbd MOVSXD (%R15,%R9,4),%R8 |
(393) 0x3bfc1 MOV 0x18(%RSP),%ECX |
(393) 0x3bfc5 MOV (%R13,%R9,8),%RAX |
(393) 0x3bfca INC %R9 |
(393) 0x3bfcd MOV %R8,%R11 |
(393) 0x3bfd0 MOV (%R15,%R9,4),%R13D |
(393) 0x3bfd4 VMOVSD 0x238(%RAX),%XMM7 |
(393) 0x3bfdc CMP %R13D,%ECX |
(393) 0x3bfdf VMULSD %XMM7,%XMM7,%XMM14 |
(393) 0x3bfe3 CMOVLE %ECX,%R13D |
(393) 0x3bfe7 SUB %R8D,%R13D |
(393) 0x3bfea TEST %R13D,%R13D |
(393) 0x3bfed JLE 3c506 |
(393) 0x3bff3 MOV (%RSP),%RDX |
(393) 0x3bff7 MOVSXD %R13D,%RSI |
(393) 0x3bffa VMOVSD 0x8(%RAX),%XMM0 |
(393) 0x3bfff XOR %ECX,%ECX |
(393) 0x3c001 MOV %RSI,0x80(%RSP) |
(393) 0x3c009 LEA (%RDX,%R8,8),%RSI |
(393) 0x3c00d XOR %EDX,%EDX |
(393) 0x3c00f AND $0x7,%R13D |
(393) 0x3c013 JE 3c15b |
(393) 0x3c019 CMP $0x1,%R13 |
(393) 0x3c01d JE 3c124 |
(393) 0x3c023 CMP $0x2,%R13 |
(393) 0x3c027 JE 3c0fc |
(393) 0x3c02d CMP $0x3,%R13 |
(393) 0x3c031 JE 3c0d4 |
(393) 0x3c037 CMP $0x4,%R13 |
(393) 0x3c03b JE 3c0ac |
(393) 0x3c03d CMP $0x5,%R13 |
(393) 0x3c041 JE 3c084 |
(393) 0x3c043 CMP $0x6,%R13 |
(393) 0x3c047 JE 3c05c |
(393) 0x3c049 VMOVSD (%RSI),%XMM1 |
(393) 0x3c04d VCOMISD %XMM1,%XMM0 |
(393) 0x3c051 JA 3c523 |
(393) 0x3c057 MOV $0x1,%EDX |
(393) 0x3c05c VMOVSD (%RSI,%RDX,8),%XMM2 |
(393) 0x3c061 VCOMISD %XMM2,%XMM0 |
(393) 0x3c065 JBE 3c081 |
(393) 0x3c067 LEA (%R11,%RDX,1),%R13D |
(393) 0x3c06b CMP %R13D,0x1c(%RSP) |
(393) 0x3c070 JE 3c081 |
(393) 0x3c072 MOVSXD %ECX,%R13 |
(393) 0x3c075 INC %ECX |
(393) 0x3c077 MOV %EDX,(%R10,%R13,4) |
(393) 0x3c07b VMOVSD %XMM2,(%RDI,%R13,8) |
(393) 0x3c081 INC %RDX |
(393) 0x3c084 VMOVSD (%RSI,%RDX,8),%XMM3 |
(393) 0x3c089 VCOMISD %XMM3,%XMM0 |
(393) 0x3c08d JBE 3c0a9 |
(393) 0x3c08f LEA (%R11,%RDX,1),%R13D |
(393) 0x3c093 CMP %R13D,0x1c(%RSP) |
(393) 0x3c098 JE 3c0a9 |
(393) 0x3c09a MOVSXD %ECX,%R13 |
(393) 0x3c09d INC %ECX |
(393) 0x3c09f MOV %EDX,(%R10,%R13,4) |
(393) 0x3c0a3 VMOVSD %XMM3,(%RDI,%R13,8) |
(393) 0x3c0a9 INC %RDX |
(393) 0x3c0ac VMOVSD (%RSI,%RDX,8),%XMM4 |
(393) 0x3c0b1 VCOMISD %XMM4,%XMM0 |
(393) 0x3c0b5 JBE 3c0d1 |
(393) 0x3c0b7 LEA (%R11,%RDX,1),%R13D |
(393) 0x3c0bb CMP %R13D,0x1c(%RSP) |
(393) 0x3c0c0 JE 3c0d1 |
(393) 0x3c0c2 MOVSXD %ECX,%R13 |
(393) 0x3c0c5 INC %ECX |
(393) 0x3c0c7 MOV %EDX,(%R10,%R13,4) |
(393) 0x3c0cb VMOVSD %XMM4,(%RDI,%R13,8) |
(393) 0x3c0d1 INC %RDX |
(393) 0x3c0d4 VMOVSD (%RSI,%RDX,8),%XMM5 |
(393) 0x3c0d9 VCOMISD %XMM5,%XMM0 |
(393) 0x3c0dd JBE 3c0f9 |
(393) 0x3c0df LEA (%R11,%RDX,1),%R13D |
(393) 0x3c0e3 CMP %R13D,0x1c(%RSP) |
(393) 0x3c0e8 JE 3c0f9 |
(393) 0x3c0ea MOVSXD %ECX,%R13 |
(393) 0x3c0ed INC %ECX |
(393) 0x3c0ef MOV %EDX,(%R10,%R13,4) |
(393) 0x3c0f3 VMOVSD %XMM5,(%RDI,%R13,8) |
(393) 0x3c0f9 INC %RDX |
(393) 0x3c0fc VMOVSD (%RSI,%RDX,8),%XMM6 |
(393) 0x3c101 VCOMISD %XMM6,%XMM0 |
(393) 0x3c105 JBE 3c121 |
(393) 0x3c107 LEA (%R11,%RDX,1),%R13D |
(393) 0x3c10b CMP %R13D,0x1c(%RSP) |
(393) 0x3c110 JE 3c121 |
(393) 0x3c112 MOVSXD %ECX,%R13 |
(393) 0x3c115 INC %ECX |
(393) 0x3c117 MOV %EDX,(%R10,%R13,4) |
(393) 0x3c11b VMOVSD %XMM6,(%RDI,%R13,8) |
(393) 0x3c121 INC %RDX |
(393) 0x3c124 VMOVSD (%RSI,%RDX,8),%XMM8 |
(393) 0x3c129 VCOMISD %XMM8,%XMM0 |
(393) 0x3c12e JBE 3c14a |
(393) 0x3c130 LEA (%R11,%RDX,1),%R13D |
(393) 0x3c134 CMP %R13D,0x1c(%RSP) |
(393) 0x3c139 JE 3c14a |
(393) 0x3c13b MOVSXD %ECX,%R13 |
(393) 0x3c13e INC %ECX |
(393) 0x3c140 MOV %EDX,(%R10,%R13,4) |
(393) 0x3c144 VMOVSD %XMM8,(%RDI,%R13,8) |
(393) 0x3c14a INC %RDX |
(393) 0x3c14d CMP %RDX,0x80(%RSP) |
(393) 0x3c155 JE 3c2bd |
(393) 0x3c15b MOV %RAX,0x60(%RSP) |
(394) 0x3c160 VMOVSD (%RSI,%RDX,8),%XMM9 |
(394) 0x3c165 VCOMISD %XMM9,%XMM0 |
(394) 0x3c16a JBE 3c185 |
(394) 0x3c16c LEA (%R11,%RDX,1),%EAX |
(394) 0x3c170 CMP %EAX,0x1c(%RSP) |
(394) 0x3c174 JE 3c185 |
(394) 0x3c176 MOVSXD %ECX,%R13 |
(394) 0x3c179 INC %ECX |
(394) 0x3c17b MOV %EDX,(%R10,%R13,4) |
(394) 0x3c17f VMOVSD %XMM9,(%RDI,%R13,8) |
(394) 0x3c185 INC %RDX |
(394) 0x3c188 VMOVSD (%RSI,%RDX,8),%XMM10 |
(394) 0x3c18d VCOMISD %XMM10,%XMM0 |
(394) 0x3c192 JBE 3c1ad |
(394) 0x3c194 LEA (%R11,%RDX,1),%EAX |
(394) 0x3c198 CMP %EAX,0x1c(%RSP) |
(394) 0x3c19c JE 3c1ad |
(394) 0x3c19e MOVSXD %ECX,%R13 |
(394) 0x3c1a1 INC %ECX |
(394) 0x3c1a3 MOV %EDX,(%R10,%R13,4) |
(394) 0x3c1a7 VMOVSD %XMM10,(%RDI,%R13,8) |
(394) 0x3c1ad LEA 0x1(%RDX),%RAX |
(394) 0x3c1b1 VMOVSD (%RSI,%RAX,8),%XMM11 |
(394) 0x3c1b6 VCOMISD %XMM11,%XMM0 |
(394) 0x3c1bb JBE 3c1d7 |
(394) 0x3c1bd LEA (%R11,%RAX,1),%R13D |
(394) 0x3c1c1 CMP %R13D,0x1c(%RSP) |
(394) 0x3c1c6 JE 3c1d7 |
(394) 0x3c1c8 MOVSXD %ECX,%R13 |
(394) 0x3c1cb INC %ECX |
(394) 0x3c1cd MOV %EAX,(%R10,%R13,4) |
(394) 0x3c1d1 VMOVSD %XMM11,(%RDI,%R13,8) |
(394) 0x3c1d7 LEA 0x2(%RDX),%RAX |
(394) 0x3c1db VMOVSD (%RSI,%RAX,8),%XMM12 |
(394) 0x3c1e0 VCOMISD %XMM12,%XMM0 |
(394) 0x3c1e5 JBE 3c201 |
(394) 0x3c1e7 LEA (%R11,%RAX,1),%R13D |
(394) 0x3c1eb CMP %R13D,0x1c(%RSP) |
(394) 0x3c1f0 JE 3c201 |
(394) 0x3c1f2 MOVSXD %ECX,%R13 |
(394) 0x3c1f5 INC %ECX |
(394) 0x3c1f7 MOV %EAX,(%R10,%R13,4) |
(394) 0x3c1fb VMOVSD %XMM12,(%RDI,%R13,8) |
(394) 0x3c201 LEA 0x3(%RDX),%RAX |
(394) 0x3c205 VMOVSD (%RSI,%RAX,8),%XMM13 |
(394) 0x3c20a VCOMISD %XMM13,%XMM0 |
(394) 0x3c20f JBE 3c22b |
(394) 0x3c211 LEA (%R11,%RAX,1),%R13D |
(394) 0x3c215 CMP %R13D,0x1c(%RSP) |
(394) 0x3c21a JE 3c22b |
(394) 0x3c21c MOVSXD %ECX,%R13 |
(394) 0x3c21f INC %ECX |
(394) 0x3c221 MOV %EAX,(%R10,%R13,4) |
(394) 0x3c225 VMOVSD %XMM13,(%RDI,%R13,8) |
(394) 0x3c22b LEA 0x4(%RDX),%RAX |
(394) 0x3c22f VMOVSD (%RSI,%RAX,8),%XMM1 |
(394) 0x3c234 VCOMISD %XMM1,%XMM0 |
(394) 0x3c238 JBE 3c254 |
(394) 0x3c23a LEA (%R11,%RAX,1),%R13D |
(394) 0x3c23e CMP %R13D,0x1c(%RSP) |
(394) 0x3c243 JE 3c254 |
(394) 0x3c245 MOVSXD %ECX,%R13 |
(394) 0x3c248 INC %ECX |
(394) 0x3c24a MOV %EAX,(%R10,%R13,4) |
(394) 0x3c24e VMOVSD %XMM1,(%RDI,%R13,8) |
(394) 0x3c254 LEA 0x5(%RDX),%RAX |
(394) 0x3c258 VMOVSD (%RSI,%RAX,8),%XMM2 |
(394) 0x3c25d VCOMISD %XMM2,%XMM0 |
(394) 0x3c261 JBE 3c27d |
(394) 0x3c263 LEA (%R11,%RAX,1),%R13D |
(394) 0x3c267 CMP %R13D,0x1c(%RSP) |
(394) 0x3c26c JE 3c27d |
(394) 0x3c26e MOVSXD %ECX,%R13 |
(394) 0x3c271 INC %ECX |
(394) 0x3c273 MOV %EAX,(%R10,%R13,4) |
(394) 0x3c277 VMOVSD %XMM2,(%RDI,%R13,8) |
(394) 0x3c27d LEA 0x6(%RDX),%RAX |
(394) 0x3c281 VMOVSD (%RSI,%RAX,8),%XMM3 |
(394) 0x3c286 VCOMISD %XMM3,%XMM0 |
(394) 0x3c28a JBE 3c2a6 |
(394) 0x3c28c LEA (%R11,%RAX,1),%R13D |
(394) 0x3c290 CMP %R13D,0x1c(%RSP) |
(394) 0x3c295 JE 3c2a6 |
(394) 0x3c297 MOVSXD %ECX,%R13 |
(394) 0x3c29a INC %ECX |
(394) 0x3c29c MOV %EAX,(%R10,%R13,4) |
(394) 0x3c2a0 VMOVSD %XMM3,(%RDI,%R13,8) |
(394) 0x3c2a6 ADD $0x7,%RDX |
(394) 0x3c2aa CMP %RDX,0x80(%RSP) |
(394) 0x3c2b2 JNE 3c160 |
(393) 0x3c2b8 MOV 0x60(%RSP),%RAX |
(393) 0x3c2bd TEST %ECX,%ECX |
(393) 0x3c2bf JLE 3c506 |
(393) 0x3c2c5 VMOVSD 0x168(%RAX),%XMM0 |
(393) 0x3c2cd VMOVSD 0x128(%RAX),%XMM5 |
(393) 0x3c2d5 MOVSXD %ECX,%RCX |
(393) 0x3c2d8 XOR %EDX,%EDX |
(393) 0x3c2da VMOVSD 0x170(%RAX),%XMM8 |
(393) 0x3c2e2 VMOVSD 0x130(%RAX),%XMM10 |
(393) 0x3c2ea VMOVSD 0xe0(%RAX),%XMM1 |
(393) 0x3c2f2 VMOVSD 0xa0(%RAX),%XMM3 |
(393) 0x3c2fa VMOVHPD 0x188(%RAX),%XMM0,%XMM4 |
(393) 0x3c302 VMOVHPD 0x148(%RAX),%XMM5,%XMM6 |
(393) 0x3c30a VMOVHPD 0x190(%RAX),%XMM8,%XMM9 |
(393) 0x3c312 VMOVHPD 0x150(%RAX),%XMM10,%XMM11 |
(393) 0x3c31a VINSERTF128 $0x1,%XMM4,%YMM6,%YMM13 |
(393) 0x3c320 VMOVSD 0xe8(%RAX),%XMM4 |
(393) 0x3c328 VMOVHPD 0x100(%RAX),%XMM1,%XMM2 |
(393) 0x3c330 VMOVHPD 0xc0(%RAX),%XMM3,%XMM0 |
(393) 0x3c338 VMOVSD 0xa8(%RAX),%XMM6 |
(393) 0x3c340 VINSERTF128 $0x1,%XMM9,%YMM11,%YMM12 |
(393) 0x3c346 VMOVSD 0xf0(%RAX),%XMM9 |
(393) 0x3c34e VINSERTF128 $0x1,%XMM2,%YMM0,%YMM11 |
(393) 0x3c354 VMOVSD 0xb0(%RAX),%XMM2 |
(393) 0x3c35c VMOVHPD 0x108(%RAX),%XMM4,%XMM5 |
(393) 0x3c364 VMOVHPD 0xc8(%RAX),%XMM6,%XMM8 |
(393) 0x3c36c VMOVSD 0x58(%RAX),%XMM0 |
(393) 0x3c371 MOV 0x218(%RAX),%R11 |
(393) 0x3c378 VMOVHPD 0xd0(%RAX),%XMM2,%XMM3 |
(393) 0x3c380 VMOVHPD 0x110(%RAX),%XMM9,%XMM1 |
(393) 0x3c388 VINSERTF128 $0x1,%XMM5,%YMM8,%YMM10 |
(393) 0x3c38e VMOVSD 0x18(%RAX),%XMM5 |
(393) 0x3c393 VINSERTF128 $0x1,%XMM1,%YMM3,%YMM9 |
(393) 0x3c399 VMOVSD 0x60(%RAX),%XMM1 |
(393) 0x3c39e VMOVSD 0x20(%RAX),%XMM3 |
(393) 0x3c3a3 VMOVHPD 0x78(%RAX),%XMM0,%XMM4 |
(393) 0x3c3a8 VMOVHPD 0x38(%RAX),%XMM5,%XMM6 |
(393) 0x3c3ad VMOVSD 0x68(%RAX),%XMM5 |
(393) 0x3c3b2 VMOVHPD 0x80(%RAX),%XMM1,%XMM2 |
(393) 0x3c3ba VMOVHPD 0x40(%RAX),%XMM3,%XMM0 |
(393) 0x3c3bf VINSERTF128 $0x1,%XMM4,%YMM6,%YMM8 |
(393) 0x3c3c5 VINSERTF128 $0x1,%XMM2,%YMM0,%YMM4 |
(393) 0x3c3cb VMOVAPD %YMM8,0x60(%RSP) |
(393) 0x3c3d1 VMOVAPD %YMM4,0x20(%RSP) |
(393) 0x3c3d7 VMOVHPD 0x88(%RAX),%XMM5,%XMM6 |
(393) 0x3c3df VMOVSD 0x28(%RAX),%XMM8 |
(393) 0x3c3e4 VMOVSD 0x70(%RAX),%XMM3 |
(393) 0x3c3e9 VMOVSD 0x30(%RAX),%XMM4 |
(393) 0x3c3ee VMOVHPD 0x48(%RAX),%XMM8,%XMM1 |
(393) 0x3c3f3 VMOVHPD 0x90(%RAX),%XMM3,%XMM0 |
(393) 0x3c3fb VMOVHPD 0x50(%RAX),%XMM4,%XMM5 |
(393) 0x3c400 VINSERTF128 $0x1,%XMM6,%YMM1,%YMM2 |
(393) 0x3c406 VMOVAPD %YMM2,0x80(%RSP) |
(393) 0x3c40f VINSERTF128 $0x1,%XMM0,%YMM5,%YMM6 |
(393) 0x3c415 VMOVAPD %YMM6,0x40(%RSP) |
(393) 0x3c41b NOPL (%RAX,%RAX,1) |
(395) 0x3c420 VMOVSD (%RDI,%RDX,8),%XMM2 |
(395) 0x3c425 VMOVAPD %YMM10,%YMM3 |
(395) 0x3c429 MOVSXD (%R10,%RDX,4),%RAX |
(395) 0x3c42d INC %RDX |
(395) 0x3c430 VMULSD %XMM2,%XMM7,%XMM8 |
(395) 0x3c434 ADD %R8,%RAX |
(395) 0x3c437 VDIVSD %XMM2,%XMM15,%XMM2 |
(395) 0x3c43b VROUNDSD $0xb,%XMM8,%XMM8,%XMM1 |
(395) 0x3c441 VCVTTSD2SI %XMM8,%ESI |
(395) 0x3c446 VSUBSD %XMM1,%XMM8,%XMM0 |
(395) 0x3c44a VMOVAPD 0x80(%RSP),%YMM1 |
(395) 0x3c453 VMULSD %XMM0,%XMM0,%XMM6 |
(395) 0x3c457 VBROADCASTSD %XMM0,%YMM4 |
(395) 0x3c45c VFMADD132PD %YMM4,%YMM9,%YMM3 |
(395) 0x3c461 VFMADD213PD 0x40(%RSP),%YMM4,%YMM1 |
(395) 0x3c468 MOVSXD %ESI,%R13 |
(395) 0x3c46b VFMADD132PD %YMM13,%YMM12,%YMM4 |
(395) 0x3c470 VMOVUPD (%R11,%R13,8),%YMM8 |
(395) 0x3c476 VMULSD %XMM6,%XMM0,%XMM0 |
(395) 0x3c47a VBROADCASTSD %XMM6,%YMM5 |
(395) 0x3c47f VFMADD231PD %YMM5,%YMM11,%YMM3 |
(395) 0x3c484 VMULPD %YMM4,%YMM8,%YMM4 |
(395) 0x3c488 VBROADCASTSD %XMM0,%YMM6 |
(395) 0x3c48d VMULPD 0x60(%RSP),%YMM6,%YMM0 |
(395) 0x3c493 VMULPD %YMM8,%YMM3,%YMM3 |
(395) 0x3c498 VFMADD231PD 0x20(%RSP),%YMM5,%YMM0 |
(395) 0x3c49f VEXTRACTF128 $0x1,%YMM4,%XMM5 |
(395) 0x3c4a5 VADDPD %YMM0,%YMM1,%YMM1 |
(395) 0x3c4a9 VMULPD %YMM8,%YMM1,%YMM6 |
(395) 0x3c4ae VADDPD %XMM4,%XMM5,%XMM8 |
(395) 0x3c4b2 VEXTRACTF128 $0x1,%YMM3,%XMM5 |
(395) 0x3c4b8 VADDPD %XMM3,%XMM5,%XMM3 |
(395) 0x3c4bc VUNPCKHPD %XMM8,%XMM8,%XMM0 |
(395) 0x3c4c1 VADDPD %XMM8,%XMM0,%XMM1 |
(395) 0x3c4c6 VUNPCKHPD %XMM3,%XMM3,%XMM8 |
(395) 0x3c4ca VADDPD %XMM3,%XMM8,%XMM0 |
(395) 0x3c4ce VEXTRACTF128 $0x1,%YMM6,%XMM5 |
(395) 0x3c4d4 VADDPD %XMM6,%XMM5,%XMM6 |
(395) 0x3c4d8 VMULSD %XMM1,%XMM14,%XMM4 |
(395) 0x3c4dc VMULSD %XMM0,%XMM7,%XMM1 |
(395) 0x3c4e0 VUNPCKHPD %XMM6,%XMM6,%XMM3 |
(395) 0x3c4e4 VADDPD %XMM6,%XMM3,%XMM8 |
(395) 0x3c4e8 VMOVSD %XMM4,(%R14,%RAX,8) |
(395) 0x3c4ee VMULSD %XMM1,%XMM2,%XMM4 |
(395) 0x3c4f2 VMOVLPD %XMM8,(%RBX,%RAX,8) |
(395) 0x3c4f7 VMOVSD %XMM4,(%R12,%RAX,8) |
(395) 0x3c4fd CMP %RCX,%RDX |
(395) 0x3c500 JNE 3c420 |
(393) 0x3c506 CMP %R9,0x10(%RSP) |
(393) 0x3c50b JNE 3bfb8 |
0x3c511 VZEROUPPER |
0x3c514 LEA -0x28(%RBP),%RSP |
0x3c518 POP %RBX |
0x3c519 POP %R12 |
0x3c51b POP %R13 |
0x3c51d POP %R14 |
0x3c51f POP %R15 |
0x3c521 POP %RBP |
0x3c522 RET |
(393) 0x3c523 CMP %R8D,0x1c(%RSP) |
(393) 0x3c528 JE 3c057 |
(393) 0x3c52e MOV %EDX,(%R10) |
(393) 0x3c531 MOV $0x1,%ECX |
(393) 0x3c536 VMOVSD %XMM1,(%RDI) |
(393) 0x3c53a JMP 3c057 |
0x3c53f NOP |
Path / |
Source file and lines | TwoBodyJastrowRef.h:254-279 |
Module | libqmcwfs.so |
nb instructions | 72 |
nb uops | 76 |
loop length | 267 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 12.67 cycles |
front end | 12.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.70 | 2.60 | 7.67 | 7.67 | 8.50 | 2.60 | 2.50 | 8.50 | 8.50 | 8.50 | 2.60 | 7.67 |
cycles | 2.70 | 2.67 | 7.67 | 7.67 | 8.50 | 2.60 | 2.50 | 8.50 | 8.50 | 8.50 | 2.60 | 7.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 12.19 |
Stall cycles | 0.00 |
Front-end | 12.67 |
Dispatch | 8.50 |
Overall L1 | 12.67 |
all | 5% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 4% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 10% |
load | 9% |
store | 10% |
mul | 6% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 9% |
store | 10% |
mul | 6% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %R9,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %R8,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0xa0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMPB $0,0x18(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JNE 3bf03 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b+0x43> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x90(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RSP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EDX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 3bf55 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b+0x95> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x18(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 80e0 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 80e0 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 80e0 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x80(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R10),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R13D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0xa0(%R11),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI,%RCX,4),%R8D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %EDI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOVSXD %R8D,%R9 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
TEST %RDI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 3c514 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b+0x654> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x200(%R11),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x268(%R10),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13D,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x1e8(%R11),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x1d0(%R11),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R9,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x24bb3(%RIP),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R11,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | TwoBodyJastrowRef.h:254-279 |
Module | libqmcwfs.so |
nb instructions | 72 |
nb uops | 76 |
loop length | 267 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 12.67 cycles |
front end | 12.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 2.70 | 2.60 | 7.67 | 7.67 | 8.50 | 2.60 | 2.50 | 8.50 | 8.50 | 8.50 | 2.60 | 7.67 |
cycles | 2.70 | 2.67 | 7.67 | 7.67 | 8.50 | 2.60 | 2.50 | 8.50 | 8.50 | 8.50 | 2.60 | 7.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 12.19 |
Stall cycles | 0.00 |
Front-end | 12.67 |
Dispatch | 8.50 |
Overall L1 | 12.67 |
all | 5% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 14% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 4% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 10% |
load | 9% |
store | 10% |
mul | 6% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 10% |
load | 9% |
store | 10% |
mul | 6% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %R9,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %R8,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0xa0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMPB $0,0x18(%RBP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JNE 3bf03 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b+0x43> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x90(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RSP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EDX,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 3bf55 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b+0x95> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x18(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R10,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SAL $0x3,%R15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 80e0 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 80e0 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R15,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 80e0 <memset@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x80(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R10),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R13D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV 0xa0(%R11),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RSI,%RCX,4),%R8D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %EDI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOVSXD %R8D,%R9 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
TEST %RDI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 3c514 <_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b+0x654> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x200(%R11),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x268(%R10),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13D,0x1c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x1e8(%R11),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x1d0(%R11),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDX,%R9,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x18(%RAX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x24bb3(%RIP),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R11,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_ZN16miniqmcreference17TwoBodyJastrowRefIN11qmcplusplus14BsplineFunctorIdEEE9computeU3ERKNS1_11ParticleSetEiPKdPdSA_SA_b– | 0.56 | 0.63 |
▼Loop 393 - TwoBodyJastrowRef.h:269-271 - libqmcwfs.so– | 0.03 | 0.03 |
○Loop 394 - BsplineFunctor.h:291-298 - libqmcwfs.so | 0.44 | 0.47 |
○Loop 395 - BsplineFunctor.h:305-336 - libqmcwfs.so | 0.08 | 0.09 |