Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:71-75 [...] | Coverage: 2.64% |
---|
Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:71-75 [...] | Coverage: 2.64% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 71 - 75 |
-------------------------------------------------------------------------------- |
71: #pragma omp parallel for simd collapse(2) |
72: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
73: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
74: post_vol(i, j) = volume(i, j); |
75: pre_vol(i, j) = post_vol(i, j) + vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j); |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42a250 PUSH %RBP |
0x42a251 MOV %RSP,%RBP |
0x42a254 PUSH %R15 |
0x42a256 PUSH %R14 |
0x42a258 PUSH %R13 |
0x42a25a PUSH %R12 |
0x42a25c PUSH %RBX |
0x42a25d AND $-0x40,%RSP |
0x42a261 ADD $-0x80,%RSP |
0x42a265 MOV 0x28(%RDI),%EAX |
0x42a268 MOV 0x2c(%RDI),%EDX |
0x42a26b MOV 0x20(%RDI),%EBX |
0x42a26e MOV 0x24(%RDI),%ECX |
0x42a271 ADD $0x4,%EDX |
0x42a274 LEA -0x1(%RAX),%R15D |
0x42a278 LEA -0x1(%RBX),%ESI |
0x42a27b MOV %EDX,0x48(%RSP) |
0x42a27f MOV %ESI,0x44(%RSP) |
0x42a283 CMP %EDX,%R15D |
0x42a286 JGE 42a833 |
0x42a28c MOV %EDX,%EBX |
0x42a28e LEA 0x4(%RCX),%R14D |
0x42a292 SUB %R15D,%EBX |
0x42a295 CMP %R14D,%ESI |
0x42a298 JGE 42a833 |
0x42a29e MOV %RDI,%R13 |
0x42a2a1 MOV %R14D,%EDI |
0x42a2a4 SUB %ESI,%EDI |
0x42a2a6 MOV %EDI,0x4c(%RSP) |
0x42a2aa CALL 4046c0 <omp_get_num_threads@plt> |
0x42a2af MOV %EAX,%R12D |
0x42a2b2 CALL 4045b0 <omp_get_thread_num@plt> |
0x42a2b7 XOR %EDX,%EDX |
0x42a2b9 MOV %EAX,%R8D |
0x42a2bc MOV 0x4c(%RSP),%EAX |
0x42a2c0 IMUL %EBX,%EAX |
0x42a2c3 DIV %R12D |
0x42a2c6 MOV %EAX,%EDI |
0x42a2c8 CMP %EDX,%R8D |
0x42a2cb JB 42a854 |
0x42a2d1 IMUL %EDI,%R8D |
0x42a2d5 LEA (%R8,%RDX,1),%R11D |
0x42a2d9 LEA (%RDI,%R11,1),%R9D |
0x42a2dd MOV %R9D,0x40(%RSP) |
0x42a2e2 CMP %R9D,%R11D |
0x42a2e5 JAE 42a833 |
0x42a2eb MOV %R11D,%EAX |
0x42a2ee XOR %EDX,%EDX |
0x42a2f0 MOV 0x44(%RSP),%R10D |
0x42a2f5 MOV 0x18(%R13),%RSI |
0x42a2f9 DIVL 0x4c(%RSP) |
0x42a2fd MOV 0x10(%R13),%RBX |
0x42a301 MOV %RSI,0x30(%RSP) |
0x42a306 MOV %RBX,0x20(%RSP) |
0x42a30b ADD %EDX,%R10D |
0x42a30e ADD %R15D,%EAX |
0x42a311 MOV %R14D,%EDX |
0x42a314 MOV 0x8(%R13),%R15 |
0x42a318 MOV (%R13),%R14 |
0x42a31c MOV %R10D,0x74(%RSP) |
0x42a321 SUB %R10D,%EDX |
0x42a324 MOVSXD %EAX,%R12 |
0x42a327 MOV %R15,0x38(%RSP) |
0x42a32c MOV %R14,0x28(%RSP) |
0x42a331 NOPL (%RAX) |
(130) 0x42a338 CMP %EDX,%EDI |
(130) 0x42a33a CMOVBE %EDI,%EDX |
(130) 0x42a33d LEA (%R11,%RDX,1),%ECX |
(130) 0x42a341 MOV %ECX,0x70(%RSP) |
(130) 0x42a345 CMP %ECX,%R11D |
(130) 0x42a348 JAE 42a806 |
(130) 0x42a34e MOV 0x38(%RSP),%R13 |
(130) 0x42a353 MOV 0x30(%RSP),%RDI |
(130) 0x42a358 MOV 0x20(%RSP),%R10 |
(130) 0x42a35d MOV 0x28(%RSP),%RAX |
(130) 0x42a362 MOV (%R13),%R8 |
(130) 0x42a366 MOV (%RDI),%R9 |
(130) 0x42a369 MOV 0x10(%RDI),%R14 |
(130) 0x42a36d MOV (%R10),%RDI |
(130) 0x42a370 IMUL %R12,%R8 |
(130) 0x42a374 MOV 0x10(%R13),%R15 |
(130) 0x42a378 MOV 0x10(%R10),%RSI |
(130) 0x42a37c IMUL %R12,%R9 |
(130) 0x42a380 MOV (%RAX),%R13 |
(130) 0x42a383 MOV 0x10(%RAX),%RBX |
(130) 0x42a387 LEA -0x1(%RDX),%EAX |
(130) 0x42a38a IMUL %R12,%RDI |
(130) 0x42a38e MOV %RSI,0x78(%RSP) |
(130) 0x42a393 MOV %R8,0x58(%RSP) |
(130) 0x42a398 IMUL %R12,%R13 |
(130) 0x42a39c MOV %R9,0x60(%RSP) |
(130) 0x42a3a1 MOV %RDI,0x68(%RSP) |
(130) 0x42a3a6 CMP $0x6,%EAX |
(130) 0x42a3a9 JBE 42a848 |
(130) 0x42a3af MOVSXD 0x74(%RSP),%RAX |
(130) 0x42a3b4 MOV 0x68(%RSP),%RSI |
(130) 0x42a3b9 LEA (%R9,%RAX,1),%RCX |
(130) 0x42a3bd LEA (%R8,%RAX,1),%R8 |
(130) 0x42a3c1 LEA (%R14,%RCX,8),%R9 |
(130) 0x42a3c5 MOV 0x78(%RSP),%RCX |
(130) 0x42a3ca LEA 0x1(%R13,%RAX,1),%RDI |
(130) 0x42a3cf ADD %RSI,%RAX |
(130) 0x42a3d2 SAL $0x3,%RDI |
(130) 0x42a3d6 LEA (%R15,%R8,8),%R10 |
(130) 0x42a3da LEA (%RCX,%RAX,8),%RSI |
(130) 0x42a3de MOV %EDX,%ECX |
(130) 0x42a3e0 LEA (%RBX,%RDI,1),%R8 |
(130) 0x42a3e4 XOR %EAX,%EAX |
(130) 0x42a3e6 SHR $0x3,%ECX |
(130) 0x42a3e9 LEA -0x8(%RBX,%RDI,1),%RDI |
(130) 0x42a3ee SAL $0x6,%RCX |
(130) 0x42a3f2 MOV %RCX,0x50(%RSP) |
(130) 0x42a3f7 SUB $0x40,%RCX |
(130) 0x42a3fb SHR $0x6,%RCX |
(130) 0x42a3ff INC %RCX |
(130) 0x42a402 AND $0x7,%ECX |
(130) 0x42a405 JE 42a553 |
(130) 0x42a40b CMP $0x1,%RCX |
(130) 0x42a40f JE 42a521 |
(130) 0x42a415 CMP $0x2,%RCX |
(130) 0x42a419 JE 42a4fa |
(130) 0x42a41f CMP $0x3,%RCX |
(130) 0x42a423 JE 42a4d3 |
(130) 0x42a429 CMP $0x4,%RCX |
(130) 0x42a42d JE 42a4ac |
(130) 0x42a42f CMP $0x5,%RCX |
(130) 0x42a433 JE 42a485 |
(130) 0x42a435 CMP $0x6,%RCX |
(130) 0x42a439 JE 42a45e |
(130) 0x42a43b VMOVUPD (%R10),%ZMM0 |
(130) 0x42a441 MOV $0x40,%EAX |
(130) 0x42a446 VMOVUPD %ZMM0,(%R9) |
(130) 0x42a44c VADDPD (%R8),%ZMM0,%ZMM1 |
(130) 0x42a452 VSUBPD (%RDI),%ZMM1,%ZMM2 |
(130) 0x42a458 VMOVUPD %ZMM2,(%RSI) |
(130) 0x42a45e VMOVUPD (%R10,%RAX,1),%ZMM3 |
(130) 0x42a465 VMOVUPD %ZMM3,(%R9,%RAX,1) |
(130) 0x42a46c VADDPD (%R8,%RAX,1),%ZMM3,%ZMM4 |
(130) 0x42a473 VSUBPD (%RDI,%RAX,1),%ZMM4,%ZMM5 |
(130) 0x42a47a VMOVUPD %ZMM5,(%RSI,%RAX,1) |
(130) 0x42a481 ADD $0x40,%RAX |
(130) 0x42a485 VMOVUPD (%R10,%RAX,1),%ZMM6 |
(130) 0x42a48c VMOVUPD %ZMM6,(%R9,%RAX,1) |
(130) 0x42a493 VADDPD (%R8,%RAX,1),%ZMM6,%ZMM7 |
(130) 0x42a49a VSUBPD (%RDI,%RAX,1),%ZMM7,%ZMM8 |
(130) 0x42a4a1 VMOVUPD %ZMM8,(%RSI,%RAX,1) |
(130) 0x42a4a8 ADD $0x40,%RAX |
(130) 0x42a4ac VMOVUPD (%R10,%RAX,1),%ZMM9 |
(130) 0x42a4b3 VMOVUPD %ZMM9,(%R9,%RAX,1) |
(130) 0x42a4ba VADDPD (%R8,%RAX,1),%ZMM9,%ZMM10 |
(130) 0x42a4c1 VSUBPD (%RDI,%RAX,1),%ZMM10,%ZMM11 |
(130) 0x42a4c8 VMOVUPD %ZMM11,(%RSI,%RAX,1) |
(130) 0x42a4cf ADD $0x40,%RAX |
(130) 0x42a4d3 VMOVUPD (%R10,%RAX,1),%ZMM12 |
(130) 0x42a4da VMOVUPD %ZMM12,(%R9,%RAX,1) |
(130) 0x42a4e1 VADDPD (%R8,%RAX,1),%ZMM12,%ZMM13 |
(130) 0x42a4e8 VSUBPD (%RDI,%RAX,1),%ZMM13,%ZMM14 |
(130) 0x42a4ef VMOVUPD %ZMM14,(%RSI,%RAX,1) |
(130) 0x42a4f6 ADD $0x40,%RAX |
(130) 0x42a4fa VMOVUPD (%R10,%RAX,1),%ZMM15 |
(130) 0x42a501 VMOVUPD %ZMM15,(%R9,%RAX,1) |
(130) 0x42a508 VADDPD (%R8,%RAX,1),%ZMM15,%ZMM0 |
(130) 0x42a50f VSUBPD (%RDI,%RAX,1),%ZMM0,%ZMM1 |
(130) 0x42a516 VMOVUPD %ZMM1,(%RSI,%RAX,1) |
(130) 0x42a51d ADD $0x40,%RAX |
(130) 0x42a521 VMOVUPD (%R10,%RAX,1),%ZMM2 |
(130) 0x42a528 VMOVUPD %ZMM2,(%R9,%RAX,1) |
(130) 0x42a52f VADDPD (%R8,%RAX,1),%ZMM2,%ZMM3 |
(130) 0x42a536 VSUBPD (%RDI,%RAX,1),%ZMM3,%ZMM4 |
(130) 0x42a53d VMOVUPD %ZMM4,(%RSI,%RAX,1) |
(130) 0x42a544 ADD $0x40,%RAX |
(130) 0x42a548 CMP %RAX,0x50(%RSP) |
(130) 0x42a54d JE 42a69f |
(131) 0x42a553 VMOVUPD (%R10,%RAX,1),%ZMM5 |
(131) 0x42a55a VMOVUPD %ZMM5,(%R9,%RAX,1) |
(131) 0x42a561 VADDPD (%R8,%RAX,1),%ZMM5,%ZMM6 |
(131) 0x42a568 VSUBPD (%RDI,%RAX,1),%ZMM6,%ZMM7 |
(131) 0x42a56f VMOVUPD %ZMM7,(%RSI,%RAX,1) |
(131) 0x42a576 VMOVUPD 0x40(%R10,%RAX,1),%ZMM8 |
(131) 0x42a57e VMOVUPD %ZMM8,0x40(%R9,%RAX,1) |
(131) 0x42a586 VADDPD 0x40(%R8,%RAX,1),%ZMM8,%ZMM9 |
(131) 0x42a58e VSUBPD 0x40(%RDI,%RAX,1),%ZMM9,%ZMM10 |
(131) 0x42a596 VMOVUPD %ZMM10,0x40(%RSI,%RAX,1) |
(131) 0x42a59e VMOVUPD 0x80(%R10,%RAX,1),%ZMM11 |
(131) 0x42a5a6 VMOVUPD %ZMM11,0x80(%R9,%RAX,1) |
(131) 0x42a5ae VADDPD 0x80(%R8,%RAX,1),%ZMM11,%ZMM12 |
(131) 0x42a5b6 VSUBPD 0x80(%RDI,%RAX,1),%ZMM12,%ZMM13 |
(131) 0x42a5be VMOVUPD %ZMM13,0x80(%RSI,%RAX,1) |
(131) 0x42a5c6 VMOVUPD 0xc0(%R10,%RAX,1),%ZMM14 |
(131) 0x42a5ce VMOVUPD %ZMM14,0xc0(%R9,%RAX,1) |
(131) 0x42a5d6 VADDPD 0xc0(%R8,%RAX,1),%ZMM14,%ZMM15 |
(131) 0x42a5de VSUBPD 0xc0(%RDI,%RAX,1),%ZMM15,%ZMM0 |
(131) 0x42a5e6 VMOVUPD %ZMM0,0xc0(%RSI,%RAX,1) |
(131) 0x42a5ee VMOVUPD 0x100(%R10,%RAX,1),%ZMM1 |
(131) 0x42a5f6 VMOVUPD %ZMM1,0x100(%R9,%RAX,1) |
(131) 0x42a5fe VADDPD 0x100(%R8,%RAX,1),%ZMM1,%ZMM2 |
(131) 0x42a606 VSUBPD 0x100(%RDI,%RAX,1),%ZMM2,%ZMM3 |
(131) 0x42a60e VMOVUPD %ZMM3,0x100(%RSI,%RAX,1) |
(131) 0x42a616 VMOVUPD 0x140(%R10,%RAX,1),%ZMM4 |
(131) 0x42a61e VMOVUPD %ZMM4,0x140(%R9,%RAX,1) |
(131) 0x42a626 VADDPD 0x140(%R8,%RAX,1),%ZMM4,%ZMM5 |
(131) 0x42a62e VSUBPD 0x140(%RDI,%RAX,1),%ZMM5,%ZMM6 |
(131) 0x42a636 VMOVUPD %ZMM6,0x140(%RSI,%RAX,1) |
(131) 0x42a63e VMOVUPD 0x180(%R10,%RAX,1),%ZMM7 |
(131) 0x42a646 VMOVUPD %ZMM7,0x180(%R9,%RAX,1) |
(131) 0x42a64e VADDPD 0x180(%R8,%RAX,1),%ZMM7,%ZMM8 |
(131) 0x42a656 VSUBPD 0x180(%RDI,%RAX,1),%ZMM8,%ZMM9 |
(131) 0x42a65e VMOVUPD %ZMM9,0x180(%RSI,%RAX,1) |
(131) 0x42a666 VMOVUPD 0x1c0(%R10,%RAX,1),%ZMM10 |
(131) 0x42a66e VMOVUPD %ZMM10,0x1c0(%R9,%RAX,1) |
(131) 0x42a676 VADDPD 0x1c0(%R8,%RAX,1),%ZMM10,%ZMM11 |
(131) 0x42a67e VSUBPD 0x1c0(%RDI,%RAX,1),%ZMM11,%ZMM12 |
(131) 0x42a686 VMOVUPD %ZMM12,0x1c0(%RSI,%RAX,1) |
(131) 0x42a68e ADD $0x200,%RAX |
(131) 0x42a694 CMP %RAX,0x50(%RSP) |
(131) 0x42a699 JNE 42a553 |
(130) 0x42a69f MOV 0x74(%RSP),%R10D |
(130) 0x42a6a4 MOV %EDX,%R9D |
(130) 0x42a6a7 AND $-0x8,%R9D |
(130) 0x42a6ab ADD %R9D,%R11D |
(130) 0x42a6ae LEA (%R9,%R10,1),%ESI |
(130) 0x42a6b2 TEST $0x7,%DL |
(130) 0x42a6b5 JE 42a801 |
(130) 0x42a6bb SUB %R9D,%EDX |
(130) 0x42a6be LEA -0x1(%RDX),%R8D |
(130) 0x42a6c2 CMP $0x2,%R8D |
(130) 0x42a6c6 JBE 42a730 |
(130) 0x42a6c8 MOVSXD 0x74(%RSP),%RAX |
(130) 0x42a6cd MOV 0x58(%RSP),%R10 |
(130) 0x42a6d2 MOV 0x60(%RSP),%R8 |
(130) 0x42a6d7 ADD %RAX,%R10 |
(130) 0x42a6da LEA (%R13,%RAX,1),%RDI |
(130) 0x42a6df ADD %R9,%R10 |
(130) 0x42a6e2 ADD %RAX,%R8 |
(130) 0x42a6e5 LEA 0x1(%R9,%RDI,1),%RCX |
(130) 0x42a6ea MOV 0x68(%RSP),%RDI |
(130) 0x42a6ef VMOVUPD (%R15,%R10,8),%YMM13 |
(130) 0x42a6f5 ADD %R9,%R8 |
(130) 0x42a6f8 ADD %RDI,%RAX |
(130) 0x42a6fb VMOVUPD %YMM13,(%R14,%R8,8) |
(130) 0x42a701 ADD %R9,%RAX |
(130) 0x42a704 MOV 0x78(%RSP),%R9 |
(130) 0x42a709 VMOVUPD (%RBX,%RCX,8),%YMM14 |
(130) 0x42a70e VSUBPD -0x8(%RBX,%RCX,8),%YMM14,%YMM15 |
(130) 0x42a714 VADDPD %YMM13,%YMM15,%YMM0 |
(130) 0x42a719 VMOVUPD %YMM0,(%R9,%RAX,8) |
(130) 0x42a71f TEST $0x3,%DL |
(130) 0x42a722 JE 42a801 |
(130) 0x42a728 AND $-0x4,%EDX |
(130) 0x42a72b ADD %EDX,%R11D |
(130) 0x42a72e ADD %EDX,%ESI |
(130) 0x42a730 MOV 0x58(%RSP),%R9 |
(130) 0x42a735 MOVSXD %ESI,%RDX |
(130) 0x42a738 MOV 0x60(%RSP),%R10 |
(130) 0x42a73d LEA (%R9,%RDX,1),%RAX |
(130) 0x42a741 LEA (%R10,%RDX,1),%RCX |
(130) 0x42a745 VMOVSD (%R15,%RAX,8),%XMM1 |
(130) 0x42a74b LEA 0x1(%RSI),%EAX |
(130) 0x42a74e CLTQ |
(130) 0x42a750 LEA (%R13,%RAX,1),%R8 |
(130) 0x42a755 VMOVSD %XMM1,(%R14,%RCX,8) |
(130) 0x42a75b LEA (%RBX,%R8,8),%RCX |
(130) 0x42a75f MOV 0x68(%RSP),%R8 |
(130) 0x42a764 VMOVSD (%RCX),%XMM2 |
(130) 0x42a768 LEA (%R8,%RDX,1),%RDI |
(130) 0x42a76c ADD %R13,%RDX |
(130) 0x42a76f VSUBSD (%RBX,%RDX,8),%XMM2,%XMM3 |
(130) 0x42a774 MOV 0x78(%RSP),%RDX |
(130) 0x42a779 VADDSD %XMM1,%XMM3,%XMM4 |
(130) 0x42a77d VMOVSD %XMM4,(%RDX,%RDI,8) |
(130) 0x42a782 MOV 0x70(%RSP),%EDI |
(130) 0x42a786 LEA 0x1(%R11),%EDX |
(130) 0x42a78a CMP %EDI,%EDX |
(130) 0x42a78c JAE 42a801 |
(130) 0x42a78e LEA (%RAX,%R9,1),%RDX |
(130) 0x42a792 LEA (%RAX,%R10,1),%RDI |
(130) 0x42a796 ADD %R8,%RAX |
(130) 0x42a799 ADD $0x2,%R11D |
(130) 0x42a79d VMOVSD (%R15,%RDX,8),%XMM5 |
(130) 0x42a7a3 LEA 0x2(%RSI),%EDX |
(130) 0x42a7a6 MOVSXD %EDX,%RDX |
(130) 0x42a7a9 VMOVSD %XMM5,(%R14,%RDI,8) |
(130) 0x42a7af LEA (%R13,%RDX,1),%RDI |
(130) 0x42a7b4 LEA (%RBX,%RDI,8),%RDI |
(130) 0x42a7b8 VADDSD (%RDI),%XMM5,%XMM6 |
(130) 0x42a7bc VSUBSD (%RCX),%XMM6,%XMM7 |
(130) 0x42a7c0 MOV 0x78(%RSP),%RCX |
(130) 0x42a7c5 VMOVSD %XMM7,(%RCX,%RAX,8) |
(130) 0x42a7ca MOV 0x70(%RSP),%EAX |
(130) 0x42a7ce CMP %EAX,%R11D |
(130) 0x42a7d1 JAE 42a801 |
(130) 0x42a7d3 ADD %RDX,%R9 |
(130) 0x42a7d6 ADD $0x3,%ESI |
(130) 0x42a7d9 ADD %RDX,%R10 |
(130) 0x42a7dc ADD %RDX,%R8 |
(130) 0x42a7df VMOVSD (%R15,%R9,8),%XMM8 |
(130) 0x42a7e5 MOVSXD %ESI,%R11 |
(130) 0x42a7e8 ADD %R13,%R11 |
(130) 0x42a7eb VMOVSD %XMM8,(%R14,%R10,8) |
(130) 0x42a7f1 VADDSD (%RBX,%R11,8),%XMM8,%XMM9 |
(130) 0x42a7f7 VSUBSD (%RDI),%XMM9,%XMM10 |
(130) 0x42a7fb VMOVSD %XMM10,(%RCX,%R8,8) |
(130) 0x42a801 MOV 0x70(%RSP),%R11D |
(130) 0x42a806 INC %R12 |
(130) 0x42a809 LEA (%R12),%R15D |
(130) 0x42a80d CMP %R15D,0x48(%RSP) |
(130) 0x42a812 JLE 42a830 |
(130) 0x42a814 MOV 0x40(%RSP),%EDI |
(130) 0x42a818 MOV 0x44(%RSP),%R14D |
(130) 0x42a81d MOV 0x4c(%RSP),%EDX |
(130) 0x42a821 MOV %R14D,0x74(%RSP) |
(130) 0x42a826 SUB %R11D,%EDI |
(130) 0x42a829 JMP 42a338 |
0x42a82e XCHG %AX,%AX |
0x42a830 VZEROUPPER |
0x42a833 LEA -0x28(%RBP),%RSP |
0x42a837 POP %RBX |
0x42a838 POP %R12 |
0x42a83a POP %R13 |
0x42a83c POP %R14 |
0x42a83e POP %R15 |
0x42a840 POP %RBP |
0x42a841 RET |
0x42a842 NOPW (%RAX,%RAX,1) |
(130) 0x42a848 MOV 0x74(%RSP),%ESI |
(130) 0x42a84c XOR %R9D,%R9D |
(130) 0x42a84f JMP 42a6bb |
0x42a854 INC %EDI |
0x42a856 XOR %EDX,%EDX |
0x42a858 JMP 42a2d1 |
0x42a85d NOPL (%RAX) |
Path / |
Source file and lines | advec_mom.cpp:71-75 |
Module | exec |
nb instructions | 80 |
nb uops | 90 |
loop length | 270 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.24-14.38 |
Stall cycles | 0.00 |
Front-end | 15.00 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 9% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA -0x1(%RBX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42a833 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x4(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42a833 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x4c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42a854 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x604> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EDI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RDI,%R11,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42a833 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x44(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x4c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R10D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42a2d1 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x81> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:71-75 |
Module | exec |
nb instructions | 80 |
nb uops | 90 |
loop length | 270 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.24-14.38 |
Stall cycles | 0.00 |
Front-end | 15.00 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 9% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA -0x1(%RBX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42a833 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x4(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42a833 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x4c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42a854 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x604> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EDI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RDI,%R11,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42a833 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x44(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x4c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R10D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42a2d1 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x81> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3– | 2.64 | 1.97 |
▼Loop 130 - advec_mom.cpp:74-75 - exec– | 0.01 | 0.01 |
○Loop 131 - advec_mom.cpp:74-75 - exec | 2.63 | 1.97 |