Function: _Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn. ... | Module: exec | Source: PdV.cpp:48-63 [...] | Coverage: 5.42% |
---|
Function: _Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn. ... | Module: exec | Source: PdV.cpp:48-63 [...] | Coverage: 5.42% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/PdV.cpp: 48 - 63 |
-------------------------------------------------------------------------------- |
48: #pragma omp parallel for simd collapse(2) |
49: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
50: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
51: double left_flux = (xarea(i, j) * (xvel0(i, j) + xvel0(i + 0, j + 1) + xvel0(i, j) + xvel0(i + 0, j + 1))) * 0.25 * dt * 0.5; |
52: double right_flux = |
53: (xarea(i + 1, j + 0) * (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1) + xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1))) * 0.25 * dt * |
54: 0.5; |
55: double bottom_flux = (yarea(i, j) * (yvel0(i, j) + yvel0(i + 1, j + 0) + yvel0(i, j) + yvel0(i + 1, j + 0))) * 0.25 * dt * 0.5; |
56: double top_flux = (yarea(i + 0, j + 1) * (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1) + yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1))) * |
57: 0.25 * dt * 0.5; |
58: double total_flux = right_flux - left_flux + top_flux - bottom_flux; |
59: double volume_change_s = volume(i, j) / (volume(i, j) + total_flux); |
60: double recip_volume = 1.0 / volume(i, j); |
61: double energy_change = (pressure(i, j) / density0(i, j) + viscosity(i, j) / density0(i, j)) * total_flux * recip_volume; |
62: energy1(i, j) = energy0(i, j) - energy_change; |
63: density1(i, j) = density0(i, j) * volume_change_s; |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x43ce40 PUSH %RBP |
0x43ce41 MOV %RSP,%RBP |
0x43ce44 PUSH %R15 |
0x43ce46 PUSH %R14 |
0x43ce48 PUSH %R13 |
0x43ce4a PUSH %R12 |
0x43ce4c PUSH %RBX |
0x43ce4d MOV %RDI,%RBX |
0x43ce50 AND $-0x40,%RSP |
0x43ce54 SUB $0x1c0,%RSP |
0x43ce5b MOV 0x68(%RDI),%EAX |
0x43ce5e MOV 0x6c(%RDI),%EDX |
0x43ce61 MOV 0x60(%RDI),%ESI |
0x43ce64 MOV 0x64(%RBX),%ECX |
0x43ce67 ADD $0x2,%EDX |
0x43ce6a LEA 0x1(%RAX),%R15D |
0x43ce6e LEA 0x1(%RSI),%EDI |
0x43ce71 MOV %EDX,0x80(%RSP) |
0x43ce78 MOV %EDI,0x7c(%RSP) |
0x43ce7c CMP %EDX,%R15D |
0x43ce7f JGE 43d96b |
0x43ce85 MOV %EDX,%R13D |
0x43ce88 LEA 0x2(%RCX),%R14D |
0x43ce8c SUB %R15D,%R13D |
0x43ce8f CMP %R14D,%EDI |
0x43ce92 JGE 43d96b |
0x43ce98 MOV %R14D,%R8D |
0x43ce9b SUB %EDI,%R8D |
0x43ce9e MOV %R8D,0x84(%RSP) |
0x43cea6 CALL 4046c0 <omp_get_num_threads@plt> |
0x43ceab MOV %EAX,%R12D |
0x43ceae CALL 4045b0 <omp_get_thread_num@plt> |
0x43ceb3 XOR %EDX,%EDX |
0x43ceb5 MOV %EAX,%R9D |
0x43ceb8 MOV 0x84(%RSP),%EAX |
0x43cebf IMUL %R13D,%EAX |
0x43cec3 DIV %R12D |
0x43cec6 MOV %EAX,%ECX |
0x43cec8 CMP %EDX,%R9D |
0x43cecb JB 43d997 |
0x43ced1 IMUL %ECX,%R9D |
0x43ced5 LEA (%R9,%RDX,1),%EAX |
0x43ced9 LEA (%RCX,%RAX,1),%R10D |
0x43cedd MOV %EAX,0x1b8(%RSP) |
0x43cee4 MOV %R10D,0x78(%RSP) |
0x43cee9 CMP %R10D,%EAX |
0x43ceec JAE 43d96b |
0x43cef2 XOR %EDX,%EDX |
0x43cef4 MOV 0x7c(%RSP),%R11D |
0x43cef9 VMOVSD 0x266cf(%RIP),%XMM2 |
0x43cf01 DIVL 0x84(%RSP) |
0x43cf08 MOV 0x8(%RBX),%RDI |
0x43cf0c MOV 0x10(%RBX),%R13 |
0x43cf10 MOV 0x58(%RBX),%R8 |
0x43cf14 MOV 0x18(%RBX),%R12 |
0x43cf18 MOV 0x40(%RBX),%R9 |
0x43cf1c MOV 0x20(%RBX),%R10 |
0x43cf20 MOV %RDI,0x70(%RSP) |
0x43cf25 VMOVSD 0x266ab(%RIP),%XMM3 |
0x43cf2d MOV %R13,0x60(%RSP) |
0x43cf32 MOV %R8,0x58(%RSP) |
0x43cf37 MOV %R12,0x50(%RSP) |
0x43cf3c MOV %R9,0x48(%RSP) |
0x43cf41 MOV %R10,0x40(%RSP) |
0x43cf46 VMULSD (%RBX),%XMM2,%XMM2 |
0x43cf4a VBROADCASTSD %XMM3,%YMM6 |
0x43cf4f VBROADCASTSD %XMM3,%ZMM4 |
0x43cf55 VBROADCASTSD %XMM2,%YMM1 |
0x43cf5a VBROADCASTSD %XMM2,%ZMM5 |
0x43cf60 LEA (%RDX,%R11,1),%ESI |
0x43cf64 LEA (%RAX,%R15,1),%R15D |
0x43cf68 MOV %R14D,%EAX |
0x43cf6b MOV 0x48(%RBX),%RDX |
0x43cf6f MOV 0x50(%RBX),%R14 |
0x43cf73 MOV 0x30(%RBX),%R11 |
0x43cf77 MOV %ESI,0x148(%RSP) |
0x43cf7e SUB %ESI,%EAX |
0x43cf80 MOV 0x38(%RBX),%RSI |
0x43cf84 MOV 0x28(%RBX),%RBX |
0x43cf88 MOVSXD %R15D,%R15 |
0x43cf8b MOV %RDX,0x38(%RSP) |
0x43cf90 MOV %R14,0x68(%RSP) |
0x43cf95 MOV %R11,0x30(%RSP) |
0x43cf9a MOV %RSI,0x28(%RSP) |
0x43cf9f MOV %RBX,0x20(%RSP) |
0x43cfa4 MOV %R15,0x140(%RSP) |
0x43cfac NOPL (%RAX) |
(223) 0x43cfb0 CMP %EAX,%ECX |
(223) 0x43cfb2 CMOVBE %ECX,%EAX |
(223) 0x43cfb5 MOV 0x1b8(%RSP),%ECX |
(223) 0x43cfbc MOV %EAX,0x14c(%RSP) |
(223) 0x43cfc3 ADD %ECX,%EAX |
(223) 0x43cfc5 MOV %EAX,0x1bc(%RSP) |
(223) 0x43cfcc CMP %EAX,%ECX |
(223) 0x43cfce JAE 43d91e |
(223) 0x43cfd4 MOV 0x68(%RSP),%R14 |
(223) 0x43cfd9 MOV 0x140(%RSP),%RDI |
(223) 0x43cfe1 MOV 0x70(%RSP),%RAX |
(223) 0x43cfe6 MOV 0x58(%RSP),%R11 |
(223) 0x43cfeb MOV (%R14),%R8 |
(223) 0x43cfee MOV %RDI,%RCX |
(223) 0x43cff1 MOV 0x60(%RSP),%R9 |
(223) 0x43cff6 MOV %RDI,%RBX |
(223) 0x43cff9 MOV 0x10(%RAX),%R13 |
(223) 0x43cffd MOV (%RAX),%RSI |
(223) 0x43d000 IMUL %R8,%RCX |
(223) 0x43d004 MOV (%R11),%RAX |
(223) 0x43d007 MOV 0x10(%R11),%R15 |
(223) 0x43d00b MOV %RDI,%R11 |
(223) 0x43d00e MOV (%R9),%RDX |
(223) 0x43d011 MOV 0x10(%R9),%R10 |
(223) 0x43d015 IMUL %RDI,%RSI |
(223) 0x43d019 MOV %R13,0x90(%RSP) |
(223) 0x43d021 IMUL %RAX,%R11 |
(223) 0x43d025 MOV %R15,0x1b0(%RSP) |
(223) 0x43d02d MOV 0x48(%RSP),%R15 |
(223) 0x43d032 ADD %RCX,%R8 |
(223) 0x43d035 IMUL %RDX,%RBX |
(223) 0x43d039 MOV 0x10(%R14),%R12 |
(223) 0x43d03d MOV %R10,0xc0(%RSP) |
(223) 0x43d045 MOV %R8,0xb8(%RSP) |
(223) 0x43d04d MOV 0x50(%RSP),%R8 |
(223) 0x43d052 LEA (%RAX,%R11,1),%R14 |
(223) 0x43d056 MOV %RDI,%RAX |
(223) 0x43d059 MOV 0x10(%R15),%R10 |
(223) 0x43d05d MOV %RSI,0x88(%RSP) |
(223) 0x43d065 IMUL (%R8),%RDI |
(223) 0x43d069 MOV 0x10(%R8),%R9 |
(223) 0x43d06d ADD %RBX,%RDX |
(223) 0x43d070 MOV %R14,0xd0(%RSP) |
(223) 0x43d078 MOV %R10,0xe8(%RSP) |
(223) 0x43d080 MOV %R9,0xd8(%RSP) |
(223) 0x43d088 MOV %R12,0x1a8(%RSP) |
(223) 0x43d090 MOV %RCX,0x98(%RSP) |
(223) 0x43d098 MOV %RBX,0xa0(%RSP) |
(223) 0x43d0a0 MOV %R11,0xa8(%RSP) |
(223) 0x43d0a8 MOV %RDX,0xc8(%RSP) |
(223) 0x43d0b0 MOV %RDI,0xb0(%RSP) |
(223) 0x43d0b8 MOV (%R15),%RDX |
(223) 0x43d0bb MOV 0x40(%RSP),%R9 |
(223) 0x43d0c0 MOV 0x38(%RSP),%R15 |
(223) 0x43d0c5 IMUL %RAX,%RDX |
(223) 0x43d0c9 MOV (%R9),%R8 |
(223) 0x43d0cc MOV 0x10(%R9),%R14 |
(223) 0x43d0d0 MOV 0x10(%R15),%R10 |
(223) 0x43d0d4 IMUL %RAX,%R8 |
(223) 0x43d0d8 MOV %R14,0xf8(%RSP) |
(223) 0x43d0e0 MOV 0x30(%RSP),%R14 |
(223) 0x43d0e5 MOV %RDX,0xe0(%RSP) |
(223) 0x43d0ed MOV (%R15),%RDX |
(223) 0x43d0f0 MOV 0x28(%RSP),%R15 |
(223) 0x43d0f5 MOV 0x10(%R14),%R9 |
(223) 0x43d0f9 MOV %R10,0x138(%RSP) |
(223) 0x43d101 MOV %R8,0xf0(%RSP) |
(223) 0x43d109 MOV (%R14),%R8 |
(223) 0x43d10c IMUL %RAX,%RDX |
(223) 0x43d110 MOV 0x10(%R15),%R10 |
(223) 0x43d114 MOV (%R15),%R14 |
(223) 0x43d117 MOV %R9,0x128(%RSP) |
(223) 0x43d11f IMUL %RAX,%R8 |
(223) 0x43d123 MOV 0x20(%RSP),%R15 |
(223) 0x43d128 IMUL %RAX,%R14 |
(223) 0x43d12c MOV %R10,0x118(%RSP) |
(223) 0x43d134 MOV 0x10(%R15),%R9 |
(223) 0x43d138 MOV %RDX,0x100(%RSP) |
(223) 0x43d140 MOV %R8,0x130(%RSP) |
(223) 0x43d148 MOV (%R15),%R8 |
(223) 0x43d14b MOV %R14,0x120(%RSP) |
(223) 0x43d153 IMUL %RAX,%R8 |
(223) 0x43d157 MOV 0x14c(%RSP),%EAX |
(223) 0x43d15e MOV %R9,0x108(%RSP) |
(223) 0x43d166 LEA -0x1(%RAX),%R10D |
(223) 0x43d16a MOV %R8,0x110(%RSP) |
(223) 0x43d172 CMP $0x6,%R10D |
(223) 0x43d176 JBE 43d980 |
(223) 0x43d17c MOVSXD 0x148(%RSP),%RAX |
(223) 0x43d184 MOV 0xb8(%RSP),%R9 |
(223) 0x43d18c MOV %R12,%R8 |
(223) 0x43d18f LEA (%R9,%RAX,1),%R10 |
(223) 0x43d193 ADD %RAX,%RSI |
(223) 0x43d196 LEA (%R11,%RAX,1),%R9 |
(223) 0x43d19a ADD %RAX,%RCX |
(223) 0x43d19d SAL $0x3,%RSI |
(223) 0x43d1a1 SAL $0x3,%R10 |
(223) 0x43d1a5 LEA (%R13,%RSI,1),%R15 |
(223) 0x43d1aa LEA 0x8(%R13,%RSI,1),%R13 |
(223) 0x43d1af SAL $0x3,%R9 |
(223) 0x43d1b3 SAL $0x3,%RCX |
(223) 0x43d1b7 LEA 0x8(%R8,%R10,1),%RSI |
(223) 0x43d1bc LEA (%R12,%R10,1),%RDX |
(223) 0x43d1c0 MOV 0xc8(%RSP),%R8 |
(223) 0x43d1c8 MOV 0xc0(%RSP),%R10 |
(223) 0x43d1d0 MOV %RSI,0x180(%RSP) |
(223) 0x43d1d8 MOV 0x1b0(%RSP),%RSI |
(223) 0x43d1e0 LEA (%R12,%RCX,1),%R14 |
(223) 0x43d1e4 LEA 0x8(%R12,%RCX,1),%R12 |
(223) 0x43d1e9 MOV %RDX,0x158(%RSP) |
(223) 0x43d1f1 LEA (%RBX,%RAX,1),%RCX |
(223) 0x43d1f5 LEA 0x8(%RSI,%R9,1),%RDX |
(223) 0x43d1fa LEA (%RSI,%R9,1),%R11 |
(223) 0x43d1fe MOV 0xd0(%RSP),%R9 |
(223) 0x43d206 MOV %RDX,0x188(%RSP) |
(223) 0x43d20e LEA (%R10,%RCX,8),%RBX |
(223) 0x43d212 LEA (%R8,%RAX,1),%RCX |
(223) 0x43d216 LEA (%R9,%RAX,1),%RDX |
(223) 0x43d21a LEA (%R10,%RCX,8),%R10 |
(223) 0x43d21e MOV 0xe0(%RSP),%RCX |
(223) 0x43d226 SAL $0x3,%RDX |
(223) 0x43d22a LEA (%RDI,%RAX,1),%R8 |
(223) 0x43d22e MOV 0xd8(%RSP),%RDI |
(223) 0x43d236 LEA (%RSI,%RDX,1),%R9 |
(223) 0x43d23a LEA 0x8(%RSI,%RDX,1),%RSI |
(223) 0x43d23f MOV 0xe8(%RSP),%RDX |
(223) 0x43d247 ADD %RAX,%RCX |
(223) 0x43d24a MOV %RSI,0x190(%RSP) |
(223) 0x43d252 MOV 0xf0(%RSP),%RSI |
(223) 0x43d25a LEA (%RDI,%R8,8),%R8 |
(223) 0x43d25e LEA (%RDX,%RCX,8),%RDI |
(223) 0x43d262 MOV 0xf8(%RSP),%RCX |
(223) 0x43d26a LEA (%RSI,%RAX,1),%RDX |
(223) 0x43d26e MOV 0x100(%RSP),%RSI |
(223) 0x43d276 LEA (%RCX,%RDX,8),%RDX |
(223) 0x43d27a LEA (%RSI,%RAX,1),%RCX |
(223) 0x43d27e MOV 0x138(%RSP),%RSI |
(223) 0x43d286 LEA (%RSI,%RCX,8),%RCX |
(223) 0x43d28a MOV 0x130(%RSP),%RSI |
(223) 0x43d292 MOV %RCX,0x160(%RSP) |
(223) 0x43d29a LEA (%RSI,%RAX,1),%RCX |
(223) 0x43d29e MOV 0x128(%RSP),%RSI |
(223) 0x43d2a6 LEA (%RSI,%RCX,8),%RCX |
(223) 0x43d2aa MOV 0x120(%RSP),%RSI |
(223) 0x43d2b2 MOV %RCX,0x198(%RSP) |
(223) 0x43d2ba LEA (%RSI,%RAX,1),%RCX |
(223) 0x43d2be MOV 0x118(%RSP),%RSI |
(223) 0x43d2c6 LEA (%RSI,%RCX,8),%RCX |
(223) 0x43d2ca MOV 0x110(%RSP),%RSI |
(223) 0x43d2d2 MOV %RCX,0x1a0(%RSP) |
(223) 0x43d2da MOV 0x108(%RSP),%RCX |
(223) 0x43d2e2 ADD %RSI,%RAX |
(223) 0x43d2e5 LEA (%RCX,%RAX,8),%RSI |
(223) 0x43d2e9 MOV 0x14c(%RSP),%ECX |
(223) 0x43d2f0 XOR %EAX,%EAX |
(223) 0x43d2f2 SHR $0x3,%ECX |
(223) 0x43d2f5 SAL $0x6,%RCX |
(223) 0x43d2f9 MOV %RCX,0x150(%RSP) |
(223) 0x43d301 NOPL (%RAX) |
(225) 0x43d308 MOV 0x158(%RSP),%RCX |
(225) 0x43d310 VMOVUPD (%RBX,%RAX,1),%ZMM9 |
(225) 0x43d317 VMOVUPD (%R13,%RAX,1),%ZMM12 |
(225) 0x43d31f VMOVUPD (%RCX,%RAX,1),%ZMM7 |
(225) 0x43d326 MOV 0x188(%RSP),%RCX |
(225) 0x43d32e VADDPD %ZMM9,%ZMM9,%ZMM11 |
(225) 0x43d334 VMOVUPD (%R10,%RAX,1),%ZMM9 |
(225) 0x43d33b VMOVUPD (%RCX,%RAX,1),%ZMM0 |
(225) 0x43d342 MOV 0x180(%RSP),%RCX |
(225) 0x43d34a VADDPD (%R14,%RAX,1),%ZMM7,%ZMM10 |
(225) 0x43d351 VADDPD %ZMM12,%ZMM12,%ZMM7 |
(225) 0x43d357 VMOVUPD (%RCX,%RAX,1),%ZMM13 |
(225) 0x43d35e MOV 0x190(%RSP),%RCX |
(225) 0x43d366 VADDPD (%R11,%RAX,1),%ZMM0,%ZMM8 |
(225) 0x43d36d VMOVUPD (%RCX,%RAX,1),%ZMM0 |
(225) 0x43d374 VADDPD (%R12,%RAX,1),%ZMM13,%ZMM15 |
(225) 0x43d37b VMOVUPD (%R15,%RAX,1),%ZMM13 |
(225) 0x43d382 MOV 0x160(%RSP),%RCX |
(225) 0x43d38a VMULPD %ZMM11,%ZMM8,%ZMM14 |
(225) 0x43d390 VADDPD (%R9,%RAX,1),%ZMM0,%ZMM8 |
(225) 0x43d397 VADDPD %ZMM9,%ZMM9,%ZMM11 |
(225) 0x43d39d VMOVUPD (%RCX,%RAX,1),%ZMM9 |
(225) 0x43d3a4 MOV 0x198(%RSP),%RCX |
(225) 0x43d3ac VMULPD %ZMM11,%ZMM8,%ZMM12 |
(225) 0x43d3b2 VADDPD (%RDI,%RAX,1),%ZMM9,%ZMM11 |
(225) 0x43d3b9 VMOVUPD (%R8,%RAX,1),%ZMM8 |
(225) 0x43d3c0 VFMADD132PD %ZMM15,%ZMM12,%ZMM7 |
(225) 0x43d3c6 VADDPD %ZMM13,%ZMM13,%ZMM15 |
(225) 0x43d3cc VDIVPD (%RDX,%RAX,1),%ZMM11,%ZMM12 |
(225) 0x43d3d3 VDIVPD %ZMM8,%ZMM4,%ZMM13 |
(225) 0x43d3d9 VFMADD231PD %ZMM10,%ZMM15,%ZMM14 |
(225) 0x43d3df VMULPD %ZMM13,%ZMM12,%ZMM15 |
(225) 0x43d3e5 VSUBPD %ZMM14,%ZMM7,%ZMM10 |
(225) 0x43d3eb VMULPD %ZMM5,%ZMM10,%ZMM0 |
(225) 0x43d3f1 VADDPD %ZMM0,%ZMM8,%ZMM14 |
(225) 0x43d3f7 VFNMADD213PD (%RCX,%RAX,1),%ZMM15,%ZMM0 |
(225) 0x43d3fe MOV 0x1a0(%RSP),%RCX |
(225) 0x43d406 VDIVPD %ZMM14,%ZMM8,%ZMM7 |
(225) 0x43d40c VMOVUPD %ZMM0,(%RCX,%RAX,1) |
(225) 0x43d413 MOV 0x150(%RSP),%RCX |
(225) 0x43d41b VMULPD (%RDX,%RAX,1),%ZMM7,%ZMM10 |
(225) 0x43d422 VMOVUPD %ZMM10,(%RSI,%RAX,1) |
(225) 0x43d429 ADD $0x40,%RAX |
(225) 0x43d42d CMP %RCX,%RAX |
(225) 0x43d430 JNE 43d308 |
(223) 0x43d436 MOV 0x14c(%RSP),%R15D |
(223) 0x43d43e MOV 0x148(%RSP),%R14D |
(223) 0x43d446 MOV %R15D,%EDX |
(223) 0x43d449 AND $-0x8,%EDX |
(223) 0x43d44c ADD %EDX,%R14D |
(223) 0x43d44f ADD %EDX,0x1b8(%RSP) |
(223) 0x43d456 MOV %R14D,0x190(%RSP) |
(223) 0x43d45e TEST $0x7,%R15B |
(223) 0x43d462 JE 43d90e |
(223) 0x43d468 MOV 0x14c(%RSP),%R13D |
(223) 0x43d470 SUB %EDX,%R13D |
(223) 0x43d473 MOV %R13D,0x188(%RSP) |
(223) 0x43d47b DEC %R13D |
(223) 0x43d47e CMP $0x2,%R13D |
(223) 0x43d482 JBE 43d6d9 |
(223) 0x43d488 MOVSXD 0x148(%RSP),%RAX |
(223) 0x43d490 MOV 0xa8(%RSP),%R11 |
(223) 0x43d498 MOV 0xd0(%RSP),%R10 |
(223) 0x43d4a0 MOV 0xb0(%RSP),%R15 |
(223) 0x43d4a8 LEA (%R11,%RAX,1),%R9 |
(223) 0x43d4ac MOV 0xf0(%RSP),%R11 |
(223) 0x43d4b4 MOV 0x88(%RSP),%R12 |
(223) 0x43d4bc MOV 0xe0(%RSP),%R14 |
(223) 0x43d4c4 LEA (%R10,%RAX,1),%RSI |
(223) 0x43d4c8 LEA (%R15,%RAX,1),%R13 |
(223) 0x43d4cc MOV 0xf8(%RSP),%R15 |
(223) 0x43d4d4 LEA (%R11,%RAX,1),%R10 |
(223) 0x43d4d8 MOV 0xb8(%RSP),%RBX |
(223) 0x43d4e0 LEA (%R12,%RAX,1),%RCX |
(223) 0x43d4e4 MOV 0x98(%RSP),%RDI |
(223) 0x43d4ec ADD %RDX,%R10 |
(223) 0x43d4ef LEA (%R14,%RAX,1),%R12 |
(223) 0x43d4f3 MOV 0x100(%RSP),%R14 |
(223) 0x43d4fb ADD %RDX,%R9 |
(223) 0x43d4fe LEA (%R15,%R10,8),%R15 |
(223) 0x43d502 MOV 0x110(%RSP),%R10 |
(223) 0x43d50a LEA (%RBX,%RAX,1),%R8 |
(223) 0x43d50e ADD %RAX,%RDI |
(223) 0x43d511 LEA (%R12,%RDX,1),%RBX |
(223) 0x43d515 LEA (%R14,%RAX,1),%R12 |
(223) 0x43d519 ADD %RDX,%R8 |
(223) 0x43d51c ADD %RDX,%RDI |
(223) 0x43d51f LEA (%R10,%RAX,1),%R14 |
(223) 0x43d523 MOV %RBX,0x1a0(%RSP) |
(223) 0x43d52b MOV 0x120(%RSP),%R11 |
(223) 0x43d533 ADD %RDX,%RCX |
(223) 0x43d536 LEA (%R14,%RDX,1),%R10 |
(223) 0x43d53a MOV 0x1a8(%RSP),%R14 |
(223) 0x43d542 MOV 0x130(%RSP),%RBX |
(223) 0x43d54a ADD %RDX,%RSI |
(223) 0x43d54d MOV %R10,0x198(%RSP) |
(223) 0x43d555 MOV 0xa0(%RSP),%R10 |
(223) 0x43d55d ADD %RAX,%R11 |
(223) 0x43d560 ADD %RDX,%R13 |
(223) 0x43d563 VMOVUPD (%R14,%R8,8),%YMM0 |
(223) 0x43d569 LEA (%RBX,%RAX,1),%RBX |
(223) 0x43d56d ADD %RDX,%R12 |
(223) 0x43d570 ADD %RDX,%R11 |
(223) 0x43d573 ADD %RDX,%RBX |
(223) 0x43d576 VADDPD (%R14,%RDI,8),%YMM0,%YMM14 |
(223) 0x43d57c LEA (%R10,%RAX,1),%R14 |
(223) 0x43d580 MOV 0xc0(%RSP),%R10 |
(223) 0x43d588 ADD %RDX,%R14 |
(223) 0x43d58b VMOVUPD (%R10,%R14,8),%YMM8 |
(223) 0x43d591 MOV 0x1b0(%RSP),%R14 |
(223) 0x43d599 VMOVUPD (%R14,%R9,8),%YMM7 |
(223) 0x43d59f VADDPD %YMM8,%YMM8,%YMM9 |
(223) 0x43d5a4 VADDPD 0x8(%R14,%R9,8),%YMM7,%YMM11 |
(223) 0x43d5ab MOV 0x1a8(%RSP),%R14 |
(223) 0x43d5b3 MOV 0x90(%RSP),%R9 |
(223) 0x43d5bb VMOVUPD 0x8(%R14,%R8,8),%YMM15 |
(223) 0x43d5c2 VMOVUPD 0x8(%R9,%RCX,8),%YMM13 |
(223) 0x43d5c9 VMULPD %YMM11,%YMM9,%YMM12 |
(223) 0x43d5ce VADDPD 0x8(%R14,%RDI,8),%YMM15,%YMM10 |
(223) 0x43d5d5 MOV 0xc8(%RSP),%RDI |
(223) 0x43d5dd VADDPD %YMM13,%YMM13,%YMM0 |
(223) 0x43d5e2 ADD %RDI,%RAX |
(223) 0x43d5e5 ADD %RDX,%RAX |
(223) 0x43d5e8 VMOVUPD (%R10,%RAX,8),%YMM8 |
(223) 0x43d5ee MOV 0x1b0(%RSP),%RDX |
(223) 0x43d5f6 VMOVUPD (%R9,%RCX,8),%YMM15 |
(223) 0x43d5fc MOV 0xd8(%RSP),%RAX |
(223) 0x43d604 VMOVUPD (%RDX,%RSI,8),%YMM7 |
(223) 0x43d609 VADDPD %YMM8,%YMM8,%YMM9 |
(223) 0x43d60e MOV 0x138(%RSP),%RCX |
(223) 0x43d616 MOV 0xe8(%RSP),%R8 |
(223) 0x43d61e VADDPD 0x8(%RDX,%RSI,8),%YMM7,%YMM11 |
(223) 0x43d624 MOV 0x1a0(%RSP),%RSI |
(223) 0x43d62c VMOVAPD %YMM7,0x160(%RSP) |
(223) 0x43d635 VMULPD %YMM11,%YMM9,%YMM13 |
(223) 0x43d63a VMOVUPD (%RCX,%R12,8),%YMM9 |
(223) 0x43d640 MOV 0x118(%RSP),%R12 |
(223) 0x43d648 VADDPD (%R8,%RSI,8),%YMM9,%YMM7 |
(223) 0x43d64e VFMADD132PD %YMM10,%YMM13,%YMM0 |
(223) 0x43d653 VADDPD %YMM15,%YMM15,%YMM10 |
(223) 0x43d658 VDIVPD (%R15),%YMM7,%YMM13 |
(223) 0x43d65d VFMADD132PD %YMM14,%YMM12,%YMM10 |
(223) 0x43d662 VSUBPD %YMM10,%YMM0,%YMM14 |
(223) 0x43d667 VMOVUPD (%RAX,%R13,8),%YMM0 |
(223) 0x43d66d MOV 0x128(%RSP),%R13 |
(223) 0x43d675 VDIVPD %YMM0,%YMM6,%YMM15 |
(223) 0x43d679 VMULPD %YMM1,%YMM14,%YMM12 |
(223) 0x43d67d VADDPD %YMM0,%YMM12,%YMM8 |
(223) 0x43d681 VDIVPD %YMM8,%YMM0,%YMM11 |
(223) 0x43d686 VMULPD %YMM15,%YMM13,%YMM10 |
(223) 0x43d68b VFNMADD213PD (%R13,%RBX,8),%YMM10,%YMM12 |
(223) 0x43d692 MOV 0x198(%RSP),%RBX |
(223) 0x43d69a VMOVUPD %YMM12,(%R12,%R11,8) |
(223) 0x43d6a0 MOV 0x188(%RSP),%R11D |
(223) 0x43d6a8 VMULPD (%R15),%YMM11,%YMM14 |
(223) 0x43d6ad MOV 0x108(%RSP),%R15 |
(223) 0x43d6b5 VMOVUPD %YMM14,(%R15,%RBX,8) |
(223) 0x43d6bb TEST $0x3,%R11B |
(223) 0x43d6bf JE 43d90e |
(223) 0x43d6c5 AND $-0x4,%R11D |
(223) 0x43d6c9 ADD %R11D,0x1b8(%RSP) |
(223) 0x43d6d1 ADD %R11D,0x190(%RSP) |
(223) 0x43d6d9 MOV 0x1a8(%RSP),%RDX |
(223) 0x43d6e1 MOVSXD 0x190(%RSP),%R9 |
(223) 0x43d6e9 MOV 0x90(%RSP),%R14 |
(223) 0x43d6f1 MOV 0x88(%RSP),%RDI |
(223) 0x43d6f9 MOV 0x98(%RSP),%RAX |
(223) 0x43d701 MOV 0xb8(%RSP),%RCX |
(223) 0x43d709 MOV %R9,0x198(%RSP) |
(223) 0x43d711 MOV %R9,%R10 |
(223) 0x43d714 MOV 0x1b0(%RSP),%R12 |
(223) 0x43d71c MOV 0xc0(%RSP),%R13 |
(223) 0x43d724 LEA (%R14,%RDI,8),%R9 |
(223) 0x43d728 MOV 0xa0(%RSP),%RSI |
(223) 0x43d730 MOV 0xa8(%RSP),%RBX |
(223) 0x43d738 LEA (%RDX,%RAX,8),%R8 |
(223) 0x43d73c LEA (%RDX,%RCX,8),%RDI |
(223) 0x43d740 MOV 0xc8(%RSP),%R11 |
(223) 0x43d748 MOV 0xd0(%RSP),%RDX |
(223) 0x43d750 MOV 0xd8(%RSP),%RAX |
(223) 0x43d758 LEA (%R13,%RSI,8),%R15 |
(223) 0x43d75d LEA (%R12,%RBX,8),%RSI |
(223) 0x43d761 MOV 0xe0(%RSP),%RBX |
(223) 0x43d769 LEA (%R13,%R11,8),%R14 |
(223) 0x43d76e LEA (%R12,%RDX,8),%RCX |
(223) 0x43d772 MOV 0xb0(%RSP),%R13 |
(223) 0x43d77a MOV 0xe8(%RSP),%R12 |
(223) 0x43d782 MOV 0xf8(%RSP),%R11 |
(223) 0x43d78a MOV 0xf0(%RSP),%RDX |
(223) 0x43d792 LEA (%RAX,%R13,8),%R13 |
(223) 0x43d796 LEA (%R12,%RBX,8),%R12 |
(223) 0x43d79a MOV 0x138(%RSP),%RAX |
(223) 0x43d7a2 MOV 0x100(%RSP),%RBX |
(223) 0x43d7aa LEA (%R11,%RDX,8),%RDX |
(223) 0x43d7ae LEA (%RAX,%RBX,8),%R11 |
(223) 0x43d7b2 MOV 0x128(%RSP),%RAX |
(223) 0x43d7ba MOV 0x130(%RSP),%RBX |
(223) 0x43d7c2 MOV %R11,0x1b0(%RSP) |
(223) 0x43d7ca LEA (%RAX,%RBX,8),%R11 |
(223) 0x43d7ce MOV 0x118(%RSP),%RAX |
(223) 0x43d7d6 MOV 0x120(%RSP),%RBX |
(223) 0x43d7de MOV %R11,0x1a8(%RSP) |
(223) 0x43d7e6 LEA (%RAX,%RBX,8),%R11 |
(223) 0x43d7ea MOV 0x108(%RSP),%RAX |
(223) 0x43d7f2 MOV 0x110(%RSP),%RBX |
(223) 0x43d7fa LEA (%RAX,%RBX,8),%RBX |
(223) 0x43d7fe MOV 0x1b8(%RSP),%EAX |
(223) 0x43d805 SUB %R10D,%EAX |
(223) 0x43d808 MOV %EAX,0x1a0(%RSP) |
(223) 0x43d80f MOV 0x198(%RSP),%RAX |
(223) 0x43d817 MOV %R11,0x198(%RSP) |
(223) 0x43d81f NOP |
(224) 0x43d820 VMOVSD 0x8(%RSI,%RAX,8),%XMM0 |
(224) 0x43d826 VMOVSD (%R15,%RAX,8),%XMM9 |
(224) 0x43d82c VMOVSD 0x8(%R9,%RAX,8),%XMM15 |
(224) 0x43d833 VMOVSD (%RDI,%RAX,8),%XMM12 |
(224) 0x43d838 VADDSD (%RSI,%RAX,8),%XMM0,%XMM8 |
(224) 0x43d83d VADDSD %XMM9,%XMM9,%XMM7 |
(224) 0x43d842 VMOVSD 0x8(%RCX,%RAX,8),%XMM0 |
(224) 0x43d848 VMOVSD (%R14,%RAX,8),%XMM9 |
(224) 0x43d84e VMOVSD 0x8(%RDI,%RAX,8),%XMM14 |
(224) 0x43d854 VADDSD %XMM15,%XMM15,%XMM10 |
(224) 0x43d859 VADDSD (%R8,%RAX,8),%XMM12,%XMM11 |
(224) 0x43d85f MOV 0x1b0(%RSP),%R10 |
(224) 0x43d867 VMULSD %XMM7,%XMM8,%XMM13 |
(224) 0x43d86b VADDSD (%RCX,%RAX,8),%XMM0,%XMM8 |
(224) 0x43d870 VADDSD %XMM9,%XMM9,%XMM7 |
(224) 0x43d875 MOV 0x1a8(%RSP),%R11 |
(224) 0x43d87d VADDSD 0x8(%R8,%RAX,8),%XMM14,%XMM12 |
(224) 0x43d884 VMOVSD (%R9,%RAX,8),%XMM14 |
(224) 0x43d88a VMULSD %XMM7,%XMM8,%XMM15 |
(224) 0x43d88e VMOVSD (%R12,%RAX,8),%XMM8 |
(224) 0x43d894 VADDSD (%R10,%RAX,8),%XMM8,%XMM7 |
(224) 0x43d89a MOV 0x198(%RSP),%R10 |
(224) 0x43d8a2 VFMADD132SD %XMM12,%XMM15,%XMM10 |
(224) 0x43d8a7 VADDSD %XMM14,%XMM14,%XMM12 |
(224) 0x43d8ac VDIVSD (%RDX,%RAX,8),%XMM7,%XMM15 |
(224) 0x43d8b1 VFMADD132SD %XMM11,%XMM13,%XMM12 |
(224) 0x43d8b6 VSUBSD %XMM12,%XMM10,%XMM11 |
(224) 0x43d8bb VMOVSD (%R13,%RAX,8),%XMM10 |
(224) 0x43d8c2 VDIVSD %XMM10,%XMM3,%XMM14 |
(224) 0x43d8c7 VMULSD %XMM2,%XMM11,%XMM13 |
(224) 0x43d8cb VADDSD %XMM13,%XMM10,%XMM0 |
(224) 0x43d8d0 VDIVSD %XMM0,%XMM10,%XMM9 |
(224) 0x43d8d4 VMULSD %XMM14,%XMM15,%XMM12 |
(224) 0x43d8d9 VFNMADD213SD (%R11,%RAX,8),%XMM12,%XMM13 |
(224) 0x43d8df MOV 0x1bc(%RSP),%R11D |
(224) 0x43d8e7 VMOVSD %XMM13,(%R10,%RAX,8) |
(224) 0x43d8ed MOV 0x1a0(%RSP),%R10D |
(224) 0x43d8f5 VMULSD (%RDX,%RAX,8),%XMM9,%XMM11 |
(224) 0x43d8fa VMOVSD %XMM11,(%RBX,%RAX,8) |
(224) 0x43d8ff INC %RAX |
(224) 0x43d902 ADD %EAX,%R10D |
(224) 0x43d905 CMP %R11D,%R10D |
(224) 0x43d908 JB 43d820 |
(223) 0x43d90e MOV 0x1bc(%RSP),%R9D |
(223) 0x43d916 MOV %R9D,0x1b8(%RSP) |
(223) 0x43d91e INCQ 0x140(%RSP) |
(223) 0x43d926 MOV 0x140(%RSP),%R8 |
(223) 0x43d92e ADD $0,%R8D |
(223) 0x43d932 CMP %R8D,0x80(%RSP) |
(223) 0x43d93a JLE 43d968 |
(223) 0x43d93c MOV 0x78(%RSP),%ECX |
(223) 0x43d940 MOV 0x1b8(%RSP),%EDI |
(223) 0x43d947 MOV 0x7c(%RSP),%R15D |
(223) 0x43d94c MOV 0x84(%RSP),%EAX |
(223) 0x43d953 SUB %EDI,%ECX |
(223) 0x43d955 MOV %R15D,0x148(%RSP) |
(223) 0x43d95d JMP 43cfb0 |
0x43d962 NOPW (%RAX,%RAX,1) |
0x43d968 VZEROUPPER |
0x43d96b LEA -0x28(%RBP),%RSP |
0x43d96f POP %RBX |
0x43d970 POP %R12 |
0x43d972 POP %R13 |
0x43d974 POP %R14 |
0x43d976 POP %R15 |
0x43d978 POP %RBP |
0x43d979 RET |
0x43d97a NOPW (%RAX,%RAX,1) |
(223) 0x43d980 MOV 0x148(%RSP),%R13D |
(223) 0x43d988 XOR %EDX,%EDX |
(223) 0x43d98a MOV %R13D,0x190(%RSP) |
(223) 0x43d992 JMP 43d468 |
0x43d997 INC %ECX |
0x43d999 XOR %EDX,%EDX |
0x43d99b JMP 43ced1 |
Path / |
Source file and lines | PdV.cpp:48-63 |
Module | exec |
nb instructions | 101 |
nb uops | 111 |
loop length | 407 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 2 |
nb stack references | 19 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.20 | 8.00 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
cycles | 7.20 | 11.93 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 17.71-17.85 |
Stall cycles | 0.00 |
Front-end | 18.50 |
Dispatch | 13.00 |
DIV/SQRT | 12.00 |
Overall L1 | 18.50 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 8% |
all | 9% |
load | 11% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 11% |
store | 10% |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x68(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x6c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x64(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RSI),%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43d96b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43d96b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EDI,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8D,0x84(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x84(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R13D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 43d997 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb57> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R9,%RDX,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%RAX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 43d96b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x7c(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x266cf(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x84(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x8(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x266ab(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD (%RBX),%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VBROADCASTSD %XMM3,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%R11,1),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RAX,%R15,1),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R14D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %ESI,0x148(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %ESI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R15D,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43ced1 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x91> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Source file and lines | PdV.cpp:48-63 |
Module | exec |
nb instructions | 101 |
nb uops | 111 |
loop length | 407 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 2 |
nb stack references | 19 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 7.20 | 8.00 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
cycles | 7.20 | 11.93 | 9.33 | 9.33 | 13.00 | 7.40 | 7.20 | 13.00 | 13.00 | 13.00 | 7.20 | 9.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 17.71-17.85 |
Stall cycles | 0.00 |
Front-end | 18.50 |
Dispatch | 13.00 |
DIV/SQRT | 12.00 |
Overall L1 | 18.50 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 8% |
all | 9% |
load | 11% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 10% |
load | 11% |
store | 10% |
mul | 12% |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x1c0,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x68(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x6c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x64(%RBX),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RSI),%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,0x7c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43d96b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x2(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 43d96b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EDI,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8D,0x84(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x84(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R13D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 43d997 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb57> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R9,%RDX,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%RAX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x1b8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R10D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 43d96b <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0xb2b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x7c(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x266cf(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x84(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x8(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RBX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RBX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x266ab(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R12,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R9,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMULSD (%RBX),%XMM2,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VBROADCASTSD %XMM3,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM2,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%R11,1),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RAX,%R15,1),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R14D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x50(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %ESI,0x148(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %ESI,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x38(%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R15D,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R11,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x140(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 43ced1 <_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0+0x91> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z10PdV_kernelbiiiidRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_._omp_fn.0– | 5.42 | 4.05 |
▼Loop 223 - PdV.cpp:51-63 - exec– | 0 | 0 |
○Loop 225 - PdV.cpp:51-63 - exec | 5.41 | 4.04 |
○Loop 224 - PdV.cpp:55-63 - exec | 0 | 0 |