Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 2.73% |
---|
Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 2.73% |
---|
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 167 - 172 |
-------------------------------------------------------------------------------- |
167: #pragma omp parallel for simd collapse(2) |
168: for (int j = (y_min - 1 + 1); j < (y_max + 2 + 2); j++) { |
169: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
170: node_mass_post(i, j) = 0.25 * (density1(i + 0, j - 1) * post_vol(i + 0, j - 1) + density1(i, j) * post_vol(i, j) + |
171: density1(i - 1, j - 1) * post_vol(i - 1, j - 1) + density1(i - 1, j + 0) * post_vol(i - 1, j + 0)); |
172: node_mass_pre(i, j) = node_mass_post(i, j) - node_flux(i + 0, j - 1) + node_flux(i, j); |
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x41fac0 PUSH %RBP |
0x41fac1 MOV %RSP,%RBP |
0x41fac4 PUSH %R15 |
0x41fac6 PUSH %R14 |
0x41fac8 PUSH %R13 |
0x41faca PUSH %R12 |
0x41facc PUSH %RBX |
0x41facd AND $-0x40,%RSP |
0x41fad1 SUB $0x100,%RSP |
0x41fad8 MOV 0x34(%RDI),%EAX |
0x41fadb MOV 0x28(%RDI),%ESI |
0x41fade MOV 0x30(%RDI),%R12D |
0x41fae2 MOV 0x2c(%RDI),%EDX |
0x41fae5 ADD $0x4,%EAX |
0x41fae8 LEA 0x1(%RSI),%ECX |
0x41faeb MOV %EAX,0x68(%RSP) |
0x41faef MOV %ECX,0x64(%RSP) |
0x41faf3 CMP %EAX,%R12D |
0x41faf6 JGE 420383 |
0x41fafc MOV %EAX,%R14D |
0x41faff LEA 0x3(%RDX),%R15D |
0x41fb03 SUB %R12D,%R14D |
0x41fb06 CMP %R15D,%ECX |
0x41fb09 JGE 420383 |
0x41fb0f MOV %RDI,%RBX |
0x41fb12 MOV %R15D,%EDI |
0x41fb15 SUB %ECX,%EDI |
0x41fb17 MOV %EDI,0x6c(%RSP) |
0x41fb1b CALL 4046c0 <omp_get_num_threads@plt> |
0x41fb20 MOV %EAX,%R13D |
0x41fb23 CALL 4045b0 <omp_get_thread_num@plt> |
0x41fb28 XOR %EDX,%EDX |
0x41fb2a MOV %EAX,%R8D |
0x41fb2d MOV 0x6c(%RSP),%EAX |
0x41fb31 IMUL %R14D,%EAX |
0x41fb35 DIV %R13D |
0x41fb38 MOV %EAX,%ECX |
0x41fb3a CMP %EDX,%R8D |
0x41fb3d JB 4203a6 |
0x41fb43 IMUL %ECX,%R8D |
0x41fb47 LEA (%R8,%RDX,1),%R10D |
0x41fb4b LEA (%RCX,%R10,1),%R9D |
0x41fb4f MOV %R9D,0x60(%RSP) |
0x41fb54 CMP %R9D,%R10D |
0x41fb57 JAE 420383 |
0x41fb5d MOV %R10D,%EAX |
0x41fb60 XOR %EDX,%EDX |
0x41fb62 MOV 0x64(%RSP),%R11D |
0x41fb67 MOV (%RBX),%RSI |
0x41fb6a DIVL 0x6c(%RSP) |
0x41fb6e MOV 0x10(%RBX),%R14 |
0x41fb72 MOV 0x8(%RBX),%RDI |
0x41fb76 MOV %R10D,0xfc(%RSP) |
0x41fb7e VMOVSD 0x2c9f2(%RIP),%XMM3 |
0x41fb86 MOV %RSI,0x40(%RSP) |
0x41fb8b MOV %R14,0x30(%RSP) |
0x41fb90 MOV %RDI,0x28(%RSP) |
0x41fb95 MOV %R15D,%R8D |
0x41fb98 MOV 0x20(%RBX),%R15 |
0x41fb9c MOV 0x18(%RBX),%RBX |
0x41fba0 VBROADCASTSD %XMM3,%YMM4 |
0x41fba5 VBROADCASTSD %XMM3,%ZMM2 |
0x41fbab MOV %R15,0x38(%RSP) |
0x41fbb0 MOV %RBX,0x20(%RSP) |
0x41fbb5 ADD %R12D,%EAX |
0x41fbb8 ADD %EDX,%R11D |
0x41fbbb MOV %EAX,0xac(%RSP) |
0x41fbc2 CLTQ |
0x41fbc4 SUB %R11D,%R8D |
0x41fbc7 MOV %R11D,0xf8(%RSP) |
0x41fbcf MOV %RAX,0x88(%RSP) |
0x41fbd7 NOPW (%RAX,%RAX,1) |
(133) 0x41fbe0 CMP %R8D,%ECX |
(133) 0x41fbe3 MOV 0xfc(%RSP),%R12D |
(133) 0x41fbeb CMOVBE %ECX,%R8D |
(133) 0x41fbef LEA (%R12,%R8,1),%ECX |
(133) 0x41fbf3 MOV %ECX,0xa8(%RSP) |
(133) 0x41fbfa CMP %ECX,%R12D |
(133) 0x41fbfd JAE 42033d |
(133) 0x41fc03 MOV 0x40(%RSP),%R10 |
(133) 0x41fc08 MOV 0x30(%RSP),%R14 |
(133) 0x41fc0d MOV 0xac(%RSP),%EAX |
(133) 0x41fc14 MOV 0x38(%RSP),%R9 |
(133) 0x41fc19 MOV (%R10),%RCX |
(133) 0x41fc1c MOV 0x10(%R14),%RDX |
(133) 0x41fc20 MOV 0x10(%R10),%R13 |
(133) 0x41fc24 MOV 0x28(%RSP),%R10 |
(133) 0x41fc29 DEC %EAX |
(133) 0x41fc2b CLTQ |
(133) 0x41fc2d MOV (%R9),%R11 |
(133) 0x41fc30 MOV 0x10(%R9),%R15 |
(133) 0x41fc34 MOV %RDX,0xd8(%RSP) |
(133) 0x41fc3c MOV (%R10),%RDX |
(133) 0x41fc3f MOV %RAX,%RSI |
(133) 0x41fc42 MOV %RAX,%R9 |
(133) 0x41fc45 MOV 0x88(%RSP),%R12 |
(133) 0x41fc4d MOV (%R14),%RBX |
(133) 0x41fc50 IMUL %RCX,%RSI |
(133) 0x41fc54 MOV 0x10(%R10),%R14 |
(133) 0x41fc58 IMUL %RDX,%RAX |
(133) 0x41fc5c IMUL %R12,%RBX |
(133) 0x41fc60 MOV %R14,0xb8(%RSP) |
(133) 0x41fc68 IMUL %R11,%R9 |
(133) 0x41fc6c MOV %RSI,0x90(%RSP) |
(133) 0x41fc74 MOV %RAX,0xe0(%RSP) |
(133) 0x41fc7c MOV 0x20(%RSP),%RAX |
(133) 0x41fc81 IMUL %R12,%RCX |
(133) 0x41fc85 IMUL %R12,%R11 |
(133) 0x41fc89 MOV %RBX,0xb0(%RSP) |
(133) 0x41fc91 LEA -0x1(%R8),%EBX |
(133) 0x41fc95 IMUL %R12,%RDX |
(133) 0x41fc99 MOV 0x10(%RAX),%RDI |
(133) 0x41fc9d MOV %R9,0xc0(%RSP) |
(133) 0x41fca5 IMUL (%RAX),%R12 |
(133) 0x41fca9 MOV %RCX,0xc8(%RSP) |
(133) 0x41fcb1 MOV %R11,0xd0(%RSP) |
(133) 0x41fcb9 MOV %RDX,0xf0(%RSP) |
(133) 0x41fcc1 MOV %RDI,0xe8(%RSP) |
(133) 0x41fcc9 MOV %R12,0xa0(%RSP) |
(133) 0x41fcd1 CMP $0x6,%EBX |
(133) 0x41fcd4 JBE 420398 |
(133) 0x41fcda MOVSXD 0xf8(%RSP),%RAX |
(133) 0x41fce2 LEA (%RSI,%RAX,1),%R10 |
(133) 0x41fce6 LEA (%R9,%RAX,1),%RSI |
(133) 0x41fcea ADD %RAX,%RCX |
(133) 0x41fced SAL $0x3,%RSI |
(133) 0x41fcf1 SAL $0x3,%R10 |
(133) 0x41fcf5 LEA (%R11,%RAX,1),%RDI |
(133) 0x41fcf9 SAL $0x3,%RCX |
(133) 0x41fcfd LEA (%R15,%RSI,1),%R9 |
(133) 0x41fd01 LEA (%R13,%R10,1),%RBX |
(133) 0x41fd06 SAL $0x3,%RDI |
(133) 0x41fd0a LEA (%R13,%RCX,1),%R14 |
(133) 0x41fd0f MOV %R9,0x78(%RSP) |
(133) 0x41fd14 LEA -0x8(%R13,%R10,1),%R9 |
(133) 0x41fd19 LEA -0x8(%R13,%RCX,1),%R10 |
(133) 0x41fd1e MOV 0xb0(%RSP),%RCX |
(133) 0x41fd26 LEA -0x8(%R15,%RSI,1),%R11 |
(133) 0x41fd2b MOV 0xd8(%RSP),%RSI |
(133) 0x41fd33 MOV %R14,0x80(%RSP) |
(133) 0x41fd3b LEA (%R15,%RDI,1),%R14 |
(133) 0x41fd3f LEA (%RCX,%RAX,1),%RDX |
(133) 0x41fd43 MOV 0xe0(%RSP),%RCX |
(133) 0x41fd4b MOV %R11,0x70(%RSP) |
(133) 0x41fd50 LEA -0x8(%R15,%RDI,1),%R11 |
(133) 0x41fd55 LEA (%RSI,%RDX,8),%RDI |
(133) 0x41fd59 MOV 0xb8(%RSP),%RSI |
(133) 0x41fd61 LEA (%RCX,%RAX,1),%RDX |
(133) 0x41fd65 LEA (%RSI,%RDX,8),%RCX |
(133) 0x41fd69 MOV 0xf0(%RSP),%RDX |
(133) 0x41fd71 MOV 0xb8(%RSP),%RSI |
(133) 0x41fd79 LEA (%RDX,%RAX,1),%RDX |
(133) 0x41fd7d ADD %R12,%RAX |
(133) 0x41fd80 LEA (%RSI,%RDX,8),%RSI |
(133) 0x41fd84 MOV 0xe8(%RSP),%RDX |
(133) 0x41fd8c LEA (%RDX,%RAX,8),%RDX |
(133) 0x41fd90 MOV %R8D,%EAX |
(133) 0x41fd93 SHR $0x3,%EAX |
(133) 0x41fd96 MOV %RAX,%R12 |
(133) 0x41fd99 SAL $0x6,%RAX |
(133) 0x41fd9d MOV %RAX,0x98(%RSP) |
(133) 0x41fda5 XOR %EAX,%EAX |
(133) 0x41fda7 AND $0x1,%R12D |
(133) 0x41fdab JE 41fe34 |
(133) 0x41fdb1 MOV 0x80(%RSP),%RAX |
(133) 0x41fdb9 VMOVUPD (%R10),%ZMM5 |
(133) 0x41fdbf MOV 0x78(%RSP),%R12 |
(133) 0x41fdc4 VMOVUPD (%R9),%ZMM8 |
(133) 0x41fdca VMOVUPD (%RAX),%ZMM6 |
(133) 0x41fdd0 VMULPD (%R11),%ZMM5,%ZMM1 |
(133) 0x41fdd6 VMOVUPD (%R12),%ZMM7 |
(133) 0x41fddd MOV 0x70(%RSP),%RAX |
(133) 0x41fde2 VMULPD (%R14),%ZMM6,%ZMM0 |
(133) 0x41fde8 MOV 0x98(%RSP),%R12 |
(133) 0x41fdf0 VFMADD231PD (%RAX),%ZMM8,%ZMM1 |
(133) 0x41fdf6 MOV $0x40,%EAX |
(133) 0x41fdfb VFMADD231PD (%RBX),%ZMM7,%ZMM0 |
(133) 0x41fe01 VADDPD %ZMM1,%ZMM0,%ZMM9 |
(133) 0x41fe07 VMULPD %ZMM2,%ZMM9,%ZMM10 |
(133) 0x41fe0d VMOVUPD %ZMM10,(%RDI) |
(133) 0x41fe13 VMOVUPD (%RSI),%ZMM11 |
(133) 0x41fe19 VSUBPD (%RCX),%ZMM11,%ZMM12 |
(133) 0x41fe1f VADDPD %ZMM10,%ZMM12,%ZMM13 |
(133) 0x41fe25 VMOVUPD %ZMM13,(%RDX) |
(133) 0x41fe2b CMP %R12,%RAX |
(133) 0x41fe2e JE 41ff52 |
(133) 0x41fe34 MOV %R15,0x50(%RSP) |
(133) 0x41fe39 MOV 0x78(%RSP),%R12 |
(133) 0x41fe3e MOV %R8D,0x5c(%RSP) |
(133) 0x41fe43 MOV 0x80(%RSP),%R8 |
(133) 0x41fe4b MOV %R13,0x48(%RSP) |
(133) 0x41fe50 MOV 0x70(%RSP),%R13 |
(134) 0x41fe55 VMOVUPD (%R8,%RAX,1),%ZMM14 |
(134) 0x41fe5c VMOVUPD (%R10,%RAX,1),%ZMM0 |
(134) 0x41fe63 VMOVUPD (%R12,%RAX,1),%ZMM6 |
(134) 0x41fe6a VMOVUPD (%R9,%RAX,1),%ZMM5 |
(134) 0x41fe71 VMULPD (%R14,%RAX,1),%ZMM14,%ZMM15 |
(134) 0x41fe78 MOV 0x98(%RSP),%R15 |
(134) 0x41fe80 VMULPD (%R11,%RAX,1),%ZMM0,%ZMM7 |
(134) 0x41fe87 VFMADD231PD (%RBX,%RAX,1),%ZMM6,%ZMM15 |
(134) 0x41fe8e VFMADD231PD (%R13,%RAX,1),%ZMM5,%ZMM7 |
(134) 0x41fe96 VADDPD %ZMM7,%ZMM15,%ZMM1 |
(134) 0x41fe9c VMULPD %ZMM2,%ZMM1,%ZMM8 |
(134) 0x41fea2 VMOVUPD %ZMM8,(%RDI,%RAX,1) |
(134) 0x41fea9 VMOVUPD (%RSI,%RAX,1),%ZMM9 |
(134) 0x41feb0 VSUBPD (%RCX,%RAX,1),%ZMM9,%ZMM10 |
(134) 0x41feb7 VADDPD %ZMM8,%ZMM10,%ZMM11 |
(134) 0x41febd VMOVUPD %ZMM11,(%RDX,%RAX,1) |
(134) 0x41fec4 VMOVUPD 0x40(%R8,%RAX,1),%ZMM12 |
(134) 0x41fecc VMOVUPD 0x40(%R10,%RAX,1),%ZMM15 |
(134) 0x41fed4 VMOVUPD 0x40(%R12,%RAX,1),%ZMM14 |
(134) 0x41fedc VMOVUPD 0x40(%R9,%RAX,1),%ZMM0 |
(134) 0x41fee4 VMULPD 0x40(%R14,%RAX,1),%ZMM12,%ZMM13 |
(134) 0x41feec VMULPD 0x40(%R11,%RAX,1),%ZMM15,%ZMM6 |
(134) 0x41fef4 VFMADD231PD 0x40(%RBX,%RAX,1),%ZMM14,%ZMM13 |
(134) 0x41fefc VFMADD231PD 0x40(%R13,%RAX,1),%ZMM0,%ZMM6 |
(134) 0x41ff04 VADDPD %ZMM6,%ZMM13,%ZMM7 |
(134) 0x41ff0a VMULPD %ZMM2,%ZMM7,%ZMM8 |
(134) 0x41ff10 VMOVUPD %ZMM8,0x40(%RDI,%RAX,1) |
(134) 0x41ff18 VMOVUPD 0x40(%RSI,%RAX,1),%ZMM5 |
(134) 0x41ff20 VSUBPD 0x40(%RCX,%RAX,1),%ZMM5,%ZMM1 |
(134) 0x41ff28 VADDPD %ZMM8,%ZMM1,%ZMM9 |
(134) 0x41ff2e VMOVUPD %ZMM9,0x40(%RDX,%RAX,1) |
(134) 0x41ff36 SUB $-0x80,%RAX |
(134) 0x41ff3a CMP %R15,%RAX |
(134) 0x41ff3d JNE 41fe55 |
(133) 0x41ff43 MOV 0x5c(%RSP),%R8D |
(133) 0x41ff48 MOV 0x50(%RSP),%R15 |
(133) 0x41ff4d MOV 0x48(%RSP),%R13 |
(133) 0x41ff52 MOV 0xf8(%RSP),%EBX |
(133) 0x41ff59 MOV %R8D,%EDX |
(133) 0x41ff5c AND $-0x8,%EDX |
(133) 0x41ff5f ADD %EDX,0xfc(%RSP) |
(133) 0x41ff66 LEA (%RDX,%RBX,1),%ECX |
(133) 0x41ff69 TEST $0x7,%R8B |
(133) 0x41ff6d JE 42032d |
(133) 0x41ff73 MOV %R8D,%ESI |
(133) 0x41ff76 SUB %EDX,%ESI |
(133) 0x41ff78 LEA -0x1(%RSI),%R14D |
(133) 0x41ff7c CMP $0x2,%R14D |
(133) 0x41ff80 JBE 42008b |
(133) 0x41ff86 MOVSXD 0xf8(%RSP),%RAX |
(133) 0x41ff8e MOV 0x90(%RSP),%R9 |
(133) 0x41ff96 MOV 0xc8(%RSP),%R10 |
(133) 0x41ff9e MOV 0xd0(%RSP),%R11 |
(133) 0x41ffa6 LEA (%R9,%RAX,1),%R8 |
(133) 0x41ffaa MOV 0xc0(%RSP),%RDI |
(133) 0x41ffb2 MOV 0xe0(%RSP),%RBX |
(133) 0x41ffba LEA (%R10,%RAX,1),%R9 |
(133) 0x41ffbe LEA (%R11,%RAX,1),%R10 |
(133) 0x41ffc2 ADD %RDX,%R8 |
(133) 0x41ffc5 MOV 0xb0(%RSP),%R12 |
(133) 0x41ffcd ADD %RDX,%R9 |
(133) 0x41ffd0 ADD %RDX,%R10 |
(133) 0x41ffd3 VMOVUPD (%R13,%R8,8),%YMM12 |
(133) 0x41ffda VMOVUPD -0x8(%R13,%R8,8),%YMM15 |
(133) 0x41ffe1 VMOVUPD (%R13,%R9,8),%YMM10 |
(133) 0x41ffe8 VMOVUPD -0x8(%R15,%R10,8),%YMM13 |
(133) 0x41ffef ADD %RAX,%RDI |
(133) 0x41fff2 LEA (%RBX,%RAX,1),%R11 |
(133) 0x41fff6 ADD %RDX,%RDI |
(133) 0x41fff9 MOV 0xf0(%RSP),%RBX |
(133) 0x420001 MOV 0xa0(%RSP),%R14 |
(133) 0x420009 LEA (%R12,%RAX,1),%R12 |
(133) 0x42000d VMULPD (%R15,%R10,8),%YMM10,%YMM11 |
(133) 0x420013 ADD %RDX,%R12 |
(133) 0x420016 ADD %RDX,%R11 |
(133) 0x420019 MOV 0xb8(%RSP),%R8 |
(133) 0x420021 VMULPD -0x8(%R13,%R9,8),%YMM13,%YMM14 |
(133) 0x420028 ADD %RAX,%RBX |
(133) 0x42002b ADD %R14,%RAX |
(133) 0x42002e ADD %RDX,%RBX |
(133) 0x420031 ADD %RDX,%RAX |
(133) 0x420034 MOV 0xd8(%RSP),%RDX |
(133) 0x42003c VFMADD231PD (%R15,%RDI,8),%YMM12,%YMM11 |
(133) 0x420042 VFMADD231PD -0x8(%R15,%RDI,8),%YMM15,%YMM14 |
(133) 0x420049 MOV 0xe8(%RSP),%RDI |
(133) 0x420051 VADDPD %YMM14,%YMM11,%YMM6 |
(133) 0x420056 VMULPD %YMM4,%YMM6,%YMM0 |
(133) 0x42005a VMOVUPD %YMM0,(%RDX,%R12,8) |
(133) 0x420060 VMOVUPD (%R8,%RBX,8),%YMM7 |
(133) 0x420066 VSUBPD (%R8,%R11,8),%YMM7,%YMM8 |
(133) 0x42006c VADDPD %YMM0,%YMM8,%YMM5 |
(133) 0x420070 VMOVUPD %YMM5,(%RDI,%RAX,8) |
(133) 0x420075 TEST $0x3,%SIL |
(133) 0x420079 JE 42032d |
(133) 0x42007f AND $-0x4,%ESI |
(133) 0x420082 ADD %ESI,0xfc(%RSP) |
(133) 0x420089 ADD %ESI,%ECX |
(133) 0x42008b MOV 0xc0(%RSP),%R10 |
(133) 0x420093 MOV 0xd0(%RSP),%RBX |
(133) 0x42009b MOVSXD %ECX,%RAX |
(133) 0x42009e LEA -0x1(%RCX),%EDX |
(133) 0x4200a1 MOVSXD %EDX,%RDX |
(133) 0x4200a4 MOV 0xc8(%RSP),%R14 |
(133) 0x4200ac MOV 0x90(%RSP),%R12 |
(133) 0x4200b4 LEA (%R10,%RAX,1),%R9 |
(133) 0x4200b8 ADD %RAX,%RBX |
(133) 0x4200bb LEA (%R15,%R9,8),%RDI |
(133) 0x4200bf LEA (%R15,%RBX,8),%R9 |
(133) 0x4200c3 MOV 0xd0(%RSP),%RBX |
(133) 0x4200cb LEA (%R14,%RAX,1),%R11 |
(133) 0x4200cf VMOVSD (%R9),%XMM1 |
(133) 0x4200d4 VMOVSD (%RDI),%XMM10 |
(133) 0x4200d8 LEA (%R12,%RAX,1),%RSI |
(133) 0x4200dc ADD %RDX,%RBX |
(133) 0x4200df LEA (%R13,%R11,8),%R8 |
(133) 0x4200e4 LEA (%RDX,%R10,1),%R11 |
(133) 0x4200e8 VMOVSD (%R15,%RBX,8),%XMM11 |
(133) 0x4200ee LEA (%RDX,%R12,1),%R10 |
(133) 0x4200f2 ADD %R14,%RDX |
(133) 0x4200f5 VMOVSD (%R15,%R11,8),%XMM13 |
(133) 0x4200fb VMULSD (%R8),%XMM1,%XMM9 |
(133) 0x420100 LEA (%R13,%RSI,8),%RSI |
(133) 0x420105 MOV 0xb0(%RSP),%R14 |
(133) 0x42010d VMULSD (%R13,%RDX,8),%XMM11,%XMM12 |
(133) 0x420114 MOV 0xd8(%RSP),%R11 |
(133) 0x42011c LEA (%R14,%RAX,1),%RDX |
(133) 0x420120 MOV 0xe0(%RSP),%R14 |
(133) 0x420128 VFMADD231SD (%RSI),%XMM10,%XMM9 |
(133) 0x42012d VFMADD231SD (%R13,%R10,8),%XMM13,%XMM12 |
(133) 0x420134 MOV 0xa0(%RSP),%R10 |
(133) 0x42013c LEA (%R10,%RAX,1),%RBX |
(133) 0x420140 MOV 0xa8(%RSP),%R10D |
(133) 0x420148 VADDSD %XMM12,%XMM9,%XMM14 |
(133) 0x42014d VMULSD %XMM3,%XMM14,%XMM15 |
(133) 0x420151 VMOVSD %XMM15,(%R11,%RDX,8) |
(133) 0x420157 MOV 0xf0(%RSP),%RDX |
(133) 0x42015f MOV 0xfc(%RSP),%R11D |
(133) 0x420167 ADD %RAX,%RDX |
(133) 0x42016a ADD %R14,%RAX |
(133) 0x42016d MOV 0xb8(%RSP),%R14 |
(133) 0x420175 INC %R11D |
(133) 0x420178 VMOVSD (%R14,%RDX,8),%XMM6 |
(133) 0x42017e VSUBSD (%R14,%RAX,8),%XMM6,%XMM0 |
(133) 0x420184 MOV 0xe8(%RSP),%RAX |
(133) 0x42018c VADDSD %XMM15,%XMM0,%XMM7 |
(133) 0x420191 VMOVSD %XMM7,(%RAX,%RBX,8) |
(133) 0x420196 LEA 0x1(%RCX),%EAX |
(133) 0x420199 CMP %R10D,%R11D |
(133) 0x42019c JAE 42032d |
(133) 0x4201a2 MOV 0xc0(%RSP),%RDX |
(133) 0x4201aa CLTQ |
(133) 0x4201ac VMOVSD (%R9),%XMM9 |
(133) 0x4201b1 ADD $0x2,%ECX |
(133) 0x4201b4 LEA (%R12,%RAX,1),%RBX |
(133) 0x4201b8 VMOVSD (%RDI),%XMM11 |
(133) 0x4201bc MOV 0xb0(%RSP),%R9 |
(133) 0x4201c4 LEA (%RDX,%RAX,1),%R11 |
(133) 0x4201c8 LEA (%R13,%RBX,8),%R10 |
(133) 0x4201cd MOV 0xc8(%RSP),%RBX |
(133) 0x4201d5 VMULSD (%R8),%XMM9,%XMM10 |
(133) 0x4201da LEA (%R15,%R11,8),%RDX |
(133) 0x4201de MOV 0xd0(%RSP),%R11 |
(133) 0x4201e6 MOV 0xd8(%RSP),%RDI |
(133) 0x4201ee ADD %RAX,%RBX |
(133) 0x4201f1 VMOVSD (%RDX),%XMM1 |
(133) 0x4201f5 MOV 0xa0(%RSP),%R8 |
(133) 0x4201fd ADD %RAX,%R11 |
(133) 0x420200 LEA (%R13,%RBX,8),%RBX |
(133) 0x420205 LEA (%R15,%R11,8),%R11 |
(133) 0x420209 VMOVSD (%R11),%XMM8 |
(133) 0x42020e VFMADD231SD (%RSI),%XMM11,%XMM10 |
(133) 0x420213 LEA (%R9,%RAX,1),%RSI |
(133) 0x420217 VMULSD (%RBX),%XMM8,%XMM5 |
(133) 0x42021b VFMADD132SD (%R10),%XMM5,%XMM1 |
(133) 0x420220 VADDSD %XMM1,%XMM10,%XMM12 |
(133) 0x420224 VMULSD %XMM3,%XMM12,%XMM13 |
(133) 0x420228 VMOVSD %XMM13,(%RDI,%RSI,8) |
(133) 0x42022d MOV 0xf0(%RSP),%RDI |
(133) 0x420235 LEA (%R8,%RAX,1),%RSI |
(133) 0x420239 MOV 0xe0(%RSP),%R8 |
(133) 0x420241 LEA (%RDI,%RAX,1),%RDI |
(133) 0x420245 ADD %R8,%RAX |
(133) 0x420248 VMOVSD (%R14,%RDI,8),%XMM14 |
(133) 0x42024e MOV 0xfc(%RSP),%EDI |
(133) 0x420255 VSUBSD (%R14,%RAX,8),%XMM14,%XMM15 |
(133) 0x42025b MOV 0xe8(%RSP),%RAX |
(133) 0x420263 ADD $0x2,%EDI |
(133) 0x420266 VADDSD %XMM13,%XMM15,%XMM6 |
(133) 0x42026b VMOVSD %XMM6,(%RAX,%RSI,8) |
(133) 0x420270 MOV 0xa8(%RSP),%ESI |
(133) 0x420277 CMP %ESI,%EDI |
(133) 0x420279 JAE 42032d |
(133) 0x42027f MOV 0xc8(%RSP),%RDI |
(133) 0x420287 MOVSXD %ECX,%RCX |
(133) 0x42028a MOV 0xd0(%RSP),%R8 |
(133) 0x420292 VMOVSD (%RBX),%XMM5 |
(133) 0x420296 MOV 0xc0(%RSP),%RAX |
(133) 0x42029e ADD %RCX,%R12 |
(133) 0x4202a1 ADD %RCX,%R9 |
(133) 0x4202a4 ADD %RCX,%RDI |
(133) 0x4202a7 ADD %RCX,%R8 |
(133) 0x4202aa VMOVSD (%R13,%R12,8),%XMM7 |
(133) 0x4202b1 VMOVSD (%R10),%XMM9 |
(133) 0x4202b6 VMOVSD (%R13,%RDI,8),%XMM0 |
(133) 0x4202bd VMULSD (%R11),%XMM5,%XMM1 |
(133) 0x4202c2 ADD %RCX,%RAX |
(133) 0x4202c5 MOV 0xd8(%RSP),%R13 |
(133) 0x4202cd MOV 0xf0(%RSP),%R10 |
(133) 0x4202d5 VMULSD (%R15,%R8,8),%XMM0,%XMM8 |
(133) 0x4202db ADD %RCX,%R10 |
(133) 0x4202de VFMADD231SD (%RDX),%XMM9,%XMM1 |
(133) 0x4202e3 MOV 0xe0(%RSP),%RDX |
(133) 0x4202eb VFMADD231SD (%R15,%RAX,8),%XMM7,%XMM8 |
(133) 0x4202f1 ADD %RCX,%RDX |
(133) 0x4202f4 MOV 0xa0(%RSP),%R15 |
(133) 0x4202fc ADD %RCX,%R15 |
(133) 0x4202ff VADDSD %XMM1,%XMM8,%XMM10 |
(133) 0x420303 VMULSD %XMM3,%XMM10,%XMM11 |
(133) 0x420307 VMOVSD %XMM11,(%R13,%R9,8) |
(133) 0x42030e VMOVSD (%R14,%R10,8),%XMM12 |
(133) 0x420314 VSUBSD (%R14,%RDX,8),%XMM12,%XMM13 |
(133) 0x42031a MOV 0xe8(%RSP),%R14 |
(133) 0x420322 VADDSD %XMM11,%XMM13,%XMM14 |
(133) 0x420327 VMOVSD %XMM14,(%R14,%R15,8) |
(133) 0x42032d MOV 0xa8(%RSP),%R11D |
(133) 0x420335 MOV %R11D,0xfc(%RSP) |
(133) 0x42033d INCL 0xac(%RSP) |
(133) 0x420344 INCQ 0x88(%RSP) |
(133) 0x42034c MOV 0xac(%RSP),%ESI |
(133) 0x420353 CMP %ESI,0x68(%RSP) |
(133) 0x420357 JLE 420380 |
(133) 0x420359 MOV 0x60(%RSP),%ECX |
(133) 0x42035d MOV 0xfc(%RSP),%R12D |
(133) 0x420365 MOV 0x64(%RSP),%EAX |
(133) 0x420369 MOV 0x6c(%RSP),%R8D |
(133) 0x42036e SUB %R12D,%ECX |
(133) 0x420371 MOV %EAX,0xf8(%RSP) |
(133) 0x420378 JMP 41fbe0 |
0x42037d NOPL (%RAX) |
0x420380 VZEROUPPER |
0x420383 LEA -0x28(%RBP),%RSP |
0x420387 POP %RBX |
0x420388 POP %R12 |
0x42038a POP %R13 |
0x42038c POP %R14 |
0x42038e POP %R15 |
0x420390 POP %RBP |
0x420391 RET |
0x420392 NOPW (%RAX,%RAX,1) |
(133) 0x420398 MOV 0xf8(%RSP),%ECX |
(133) 0x42039f XOR %EDX,%EDX |
(133) 0x4203a1 JMP 41ff73 |
0x4203a6 INC %ECX |
0x4203a8 XOR %EDX,%EDX |
0x4203aa JMP 41fb43 |
0x4203af NOP |
Path / |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 87 |
nb uops | 97 |
loop length | 325 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 1 |
used zmm registers | 1 |
nb stack references | 14 |
micro-operation queue | 16.17 cycles |
front end | 16.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.30 | 8.00 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
cycles | 6.30 | 11.90 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.43-15.52 |
Stall cycles | 0.00 |
Front-end | 16.17 |
Dispatch | 11.90 |
DIV/SQRT | 12.00 |
Overall L1 | 16.17 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 10% |
all | 8% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 9% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x100,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x34(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ECX,0x64(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EAX,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 420383 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x3(%RDX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R12D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R15D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 420383 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ECX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x6c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x6c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R14D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4203a6 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R10,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 420383 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x64(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x6c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0xfc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x2c9f2(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x20(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD %XMM3,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R12D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0xac(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SUB %R11D,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11D,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 41fb43 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x83> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 87 |
nb uops | 97 |
loop length | 325 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 1 |
used zmm registers | 1 |
nb stack references | 14 |
micro-operation queue | 16.17 cycles |
front end | 16.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.30 | 8.00 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
cycles | 6.30 | 11.90 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.43-15.52 |
Stall cycles | 0.00 |
Front-end | 16.17 |
Dispatch | 11.90 |
DIV/SQRT | 12.00 |
Overall L1 | 16.17 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 10% |
all | 8% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 9% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x100,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x34(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ECX,0x64(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EAX,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 420383 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x3(%RDX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R12D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R15D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 420383 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ECX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x6c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x6c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R14D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4203a6 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R10,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 420383 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x64(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x6c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0xfc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x2c9f2(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x20(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD %XMM3,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R12D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0xac(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SUB %R11D,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11D,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 41fb43 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x83> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 2.73 | 0.87 |
▼Loop 133 - advec_mom.cpp:169-172 - exec– | 0.01 | 0 |
○Loop 134 - advec_mom.cpp:170-172 - exec | 2.73 | 0.87 |