Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:218-221 [...] | Coverage: 4.48% |
---|
Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:218-221 [...] | Coverage: 4.48% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 218 - 221 |
-------------------------------------------------------------------------------- |
218: #pragma omp parallel for simd collapse(2) |
219: for (int j = (y_min + 1); j < (y_max + 1 + 2); j++) { |
220: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
221: vel1(i, j) = (vel1(i, j) * node_mass_pre(i, j) + mom_flux(i + 0, j - 1) - mom_flux(i, j)) / node_mass_post(i, j); |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42dd00 PUSH %RBP |
0x42dd01 MOV %RSP,%RBP |
0x42dd04 PUSH %R15 |
0x42dd06 PUSH %R14 |
0x42dd08 PUSH %R13 |
0x42dd0a PUSH %R12 |
0x42dd0c PUSH %RBX |
0x42dd0d AND $-0x40,%RSP |
0x42dd11 ADD $-0x80,%RSP |
0x42dd15 MOV 0x28(%RDI),%EAX |
0x42dd18 MOV 0x2c(%RDI),%EDX |
0x42dd1b MOV 0x20(%RDI),%ESI |
0x42dd1e MOV 0x24(%RDI),%EBX |
0x42dd21 ADD $0x3,%EDX |
0x42dd24 LEA 0x1(%RAX),%R15D |
0x42dd28 INC %ESI |
0x42dd2a MOV %EDX,0x3c(%RSP) |
0x42dd2e MOV %ESI,0x38(%RSP) |
0x42dd32 CMP %EDX,%R15D |
0x42dd35 JGE 42e203 |
0x42dd3b LEA 0x3(%RBX),%R14D |
0x42dd3f MOV %EDX,%EBX |
0x42dd41 SUB %R15D,%EBX |
0x42dd44 CMP %R14D,%ESI |
0x42dd47 JGE 42e203 |
0x42dd4d MOV %R14D,%ECX |
0x42dd50 MOV %RDI,%R12 |
0x42dd53 SUB %ESI,%ECX |
0x42dd55 MOV %ECX,0x58(%RSP) |
0x42dd59 CALL 4046c0 <omp_get_num_threads@plt> |
0x42dd5e MOV %EAX,%R13D |
0x42dd61 CALL 4045b0 <omp_get_thread_num@plt> |
0x42dd66 XOR %EDX,%EDX |
0x42dd68 MOV %EAX,%EDI |
0x42dd6a MOV 0x58(%RSP),%EAX |
0x42dd6e IMUL %EBX,%EAX |
0x42dd71 DIV %R13D |
0x42dd74 MOV %EAX,%ECX |
0x42dd76 CMP %EDX,%EDI |
0x42dd78 JB 42e223 |
0x42dd7e IMUL %ECX,%EDI |
0x42dd81 LEA (%RDI,%RDX,1),%R11D |
0x42dd85 LEA (%RCX,%R11,1),%R8D |
0x42dd89 MOV %R8D,0x34(%RSP) |
0x42dd8e CMP %R8D,%R11D |
0x42dd91 JAE 42e203 |
0x42dd97 MOV %R11D,%EAX |
0x42dd9a XOR %EDX,%EDX |
0x42dd9c MOV 0x38(%RSP),%R9D |
0x42dda1 MOV (%R12),%R10 |
0x42dda5 DIVL 0x58(%RSP) |
0x42dda9 MOV 0x18(%R12),%RSI |
0x42ddae MOV %R10,0x28(%RSP) |
0x42ddb3 MOV %RSI,0x18(%RSP) |
0x42ddb8 ADD %EDX,%R9D |
0x42ddbb ADD %R15D,%EAX |
0x42ddbe MOV %R14D,%EDX |
0x42ddc1 MOV 0x10(%R12),%R15 |
0x42ddc6 MOV 0x8(%R12),%R14 |
0x42ddcb MOV %R9D,0x74(%RSP) |
0x42ddd0 SUB %R9D,%EDX |
0x42ddd3 MOVSXD %EAX,%R12 |
0x42ddd6 MOV %EAX,0x70(%RSP) |
0x42ddda MOV %R15,0x20(%RSP) |
0x42dddf MOV %R14,0x10(%RSP) |
0x42dde4 NOPL (%RAX) |
(148) 0x42dde8 CMP %EDX,%ECX |
(148) 0x42ddea CMOVBE %ECX,%EDX |
(148) 0x42dded LEA (%R11,%RDX,1),%EBX |
(148) 0x42ddf1 MOV %EBX,0x5c(%RSP) |
(148) 0x42ddf5 CMP %EBX,%R11D |
(148) 0x42ddf8 JAE 42e1d5 |
(148) 0x42ddfe MOV 0x20(%RSP),%RDI |
(148) 0x42de03 MOV 0x28(%RSP),%RCX |
(148) 0x42de08 MOV 0x18(%RSP),%R8 |
(148) 0x42de0d MOV 0x70(%RSP),%EAX |
(148) 0x42de11 MOV (%RDI),%RBX |
(148) 0x42de14 MOV 0x10(%RDI),%R14 |
(148) 0x42de18 MOV 0x10(%RSP),%RDI |
(148) 0x42de1d MOV (%R8),%R9 |
(148) 0x42de20 DEC %EAX |
(148) 0x42de22 MOV (%RCX),%R13 |
(148) 0x42de25 MOV 0x10(%R8),%RSI |
(148) 0x42de29 MOVSXD %EAX,%R10 |
(148) 0x42de2c IMUL %R12,%RBX |
(148) 0x42de30 MOV (%RDI),%R8 |
(148) 0x42de33 IMUL %R9,%R10 |
(148) 0x42de37 MOV 0x10(%RCX),%R15 |
(148) 0x42de3b LEA -0x1(%RDX),%EAX |
(148) 0x42de3e IMUL %R12,%R13 |
(148) 0x42de42 MOV 0x10(%RDI),%RCX |
(148) 0x42de46 IMUL %R12,%R9 |
(148) 0x42de4a MOV %RBX,0x48(%RSP) |
(148) 0x42de4f IMUL %R12,%R8 |
(148) 0x42de53 MOV %R10,0x50(%RSP) |
(148) 0x42de58 MOV %R13,0x40(%RSP) |
(148) 0x42de5d MOV %R9,0x68(%RSP) |
(148) 0x42de62 MOV %RCX,0x78(%RSP) |
(148) 0x42de67 MOV %R8,0x60(%RSP) |
(148) 0x42de6c CMP $0x6,%EAX |
(148) 0x42de6f JBE 42e218 |
(148) 0x42de75 MOVSXD 0x74(%RSP),%RAX |
(148) 0x42de7a LEA (%R13,%RAX,1),%R13 |
(148) 0x42de7f ADD %RAX,%RBX |
(148) 0x42de82 LEA (%R9,%RAX,1),%R9 |
(148) 0x42de86 LEA (%R15,%R13,8),%RCX |
(148) 0x42de8a LEA (%R14,%RBX,8),%R13 |
(148) 0x42de8e LEA (%R10,%RAX,1),%RBX |
(148) 0x42de92 ADD %R8,%RAX |
(148) 0x42de95 MOV 0x78(%RSP),%R8 |
(148) 0x42de9a LEA (%RSI,%RBX,8),%R10 |
(148) 0x42de9e LEA (%RSI,%R9,8),%RBX |
(148) 0x42dea2 LEA (%R8,%RAX,8),%R9 |
(148) 0x42dea6 MOV %EDX,%R8D |
(148) 0x42dea9 XOR %EAX,%EAX |
(148) 0x42deab SHR $0x3,%R8D |
(148) 0x42deaf SAL $0x6,%R8 |
(148) 0x42deb3 LEA -0x40(%R8),%RDI |
(148) 0x42deb7 SHR $0x6,%RDI |
(148) 0x42debb INC %RDI |
(148) 0x42debe AND $0x3,%EDI |
(148) 0x42dec1 JE 42df64 |
(148) 0x42dec7 CMP $0x1,%RDI |
(148) 0x42decb JE 42df2c |
(148) 0x42decd CMP $0x2,%RDI |
(148) 0x42ded1 JE 42defd |
(148) 0x42ded3 VMOVUPD (%R13),%ZMM0 |
(148) 0x42deda VMOVUPD (%RBX),%ZMM3 |
(148) 0x42dee0 MOV $0x40,%EAX |
(148) 0x42dee5 VFMSUB132PD (%RCX),%ZMM3,%ZMM0 |
(148) 0x42deeb VADDPD (%R10),%ZMM0,%ZMM1 |
(148) 0x42def1 VDIVPD (%R9),%ZMM1,%ZMM2 |
(148) 0x42def7 VMOVUPD %ZMM2,(%RCX) |
(148) 0x42defd VMOVUPD (%R13,%RAX,1),%ZMM4 |
(148) 0x42df05 VMOVUPD (%RBX,%RAX,1),%ZMM5 |
(148) 0x42df0c VFMSUB132PD (%RCX,%RAX,1),%ZMM5,%ZMM4 |
(148) 0x42df13 VADDPD (%R10,%RAX,1),%ZMM4,%ZMM6 |
(148) 0x42df1a VDIVPD (%R9,%RAX,1),%ZMM6,%ZMM7 |
(148) 0x42df21 VMOVUPD %ZMM7,(%RCX,%RAX,1) |
(148) 0x42df28 ADD $0x40,%RAX |
(148) 0x42df2c VMOVUPD (%R13,%RAX,1),%ZMM8 |
(148) 0x42df34 VMOVUPD (%RBX,%RAX,1),%ZMM9 |
(148) 0x42df3b VFMSUB132PD (%RCX,%RAX,1),%ZMM9,%ZMM8 |
(148) 0x42df42 VADDPD (%R10,%RAX,1),%ZMM8,%ZMM10 |
(148) 0x42df49 VDIVPD (%R9,%RAX,1),%ZMM10,%ZMM11 |
(148) 0x42df50 VMOVUPD %ZMM11,(%RCX,%RAX,1) |
(148) 0x42df57 ADD $0x40,%RAX |
(148) 0x42df5b CMP %RAX,%R8 |
(148) 0x42df5e JE 42e02e |
(149) 0x42df64 VMOVUPD (%R13,%RAX,1),%ZMM12 |
(149) 0x42df6c VMOVUPD (%RBX,%RAX,1),%ZMM13 |
(149) 0x42df73 VFMSUB132PD (%RCX,%RAX,1),%ZMM13,%ZMM12 |
(149) 0x42df7a VADDPD (%R10,%RAX,1),%ZMM12,%ZMM14 |
(149) 0x42df81 VDIVPD (%R9,%RAX,1),%ZMM14,%ZMM15 |
(149) 0x42df88 VMOVUPD %ZMM15,(%RCX,%RAX,1) |
(149) 0x42df8f VMOVUPD 0x40(%R13,%RAX,1),%ZMM0 |
(149) 0x42df97 VMOVUPD 0x40(%RBX,%RAX,1),%ZMM3 |
(149) 0x42df9f VFMSUB132PD 0x40(%RCX,%RAX,1),%ZMM3,%ZMM0 |
(149) 0x42dfa7 VADDPD 0x40(%R10,%RAX,1),%ZMM0,%ZMM1 |
(149) 0x42dfaf VDIVPD 0x40(%R9,%RAX,1),%ZMM1,%ZMM2 |
(149) 0x42dfb7 VMOVUPD %ZMM2,0x40(%RCX,%RAX,1) |
(149) 0x42dfbf VMOVUPD 0x80(%R13,%RAX,1),%ZMM5 |
(149) 0x42dfc7 VMOVUPD 0x80(%RBX,%RAX,1),%ZMM4 |
(149) 0x42dfcf VFMSUB132PD 0x80(%RCX,%RAX,1),%ZMM4,%ZMM5 |
(149) 0x42dfd7 VADDPD 0x80(%R10,%RAX,1),%ZMM5,%ZMM6 |
(149) 0x42dfdf VDIVPD 0x80(%R9,%RAX,1),%ZMM6,%ZMM7 |
(149) 0x42dfe7 VMOVUPD %ZMM7,0x80(%RCX,%RAX,1) |
(149) 0x42dfef VMOVUPD 0xc0(%R13,%RAX,1),%ZMM8 |
(149) 0x42dff7 VMOVUPD 0xc0(%RBX,%RAX,1),%ZMM9 |
(149) 0x42dfff VFMSUB132PD 0xc0(%RCX,%RAX,1),%ZMM9,%ZMM8 |
(149) 0x42e007 VADDPD 0xc0(%R10,%RAX,1),%ZMM8,%ZMM10 |
(149) 0x42e00f VDIVPD 0xc0(%R9,%RAX,1),%ZMM10,%ZMM11 |
(149) 0x42e017 VMOVUPD %ZMM11,0xc0(%RCX,%RAX,1) |
(149) 0x42e01f ADD $0x100,%RAX |
(149) 0x42e025 CMP %RAX,%R8 |
(149) 0x42e028 JNE 42df64 |
(148) 0x42e02e MOV 0x74(%RSP),%R13D |
(148) 0x42e033 MOV %EDX,%ECX |
(148) 0x42e035 AND $-0x8,%ECX |
(148) 0x42e038 ADD %ECX,%R11D |
(148) 0x42e03b LEA (%RCX,%R13,1),%EDI |
(148) 0x42e03f TEST $0x7,%DL |
(148) 0x42e042 JE 42e1d0 |
(148) 0x42e048 SUB %ECX,%EDX |
(148) 0x42e04a LEA -0x1(%RDX),%R10D |
(148) 0x42e04e CMP $0x2,%R10D |
(148) 0x42e052 JBE 42e0cb |
(148) 0x42e054 MOVSXD 0x74(%RSP),%RAX |
(148) 0x42e059 MOV 0x40(%RSP),%RBX |
(148) 0x42e05e MOV 0x60(%RSP),%R8 |
(148) 0x42e063 MOV 0x48(%RSP),%R10 |
(148) 0x42e068 LEA (%RBX,%RAX,1),%R9 |
(148) 0x42e06c MOV 0x68(%RSP),%R13 |
(148) 0x42e071 ADD %RCX,%R9 |
(148) 0x42e074 LEA (%R8,%RAX,1),%R8 |
(148) 0x42e078 ADD %RAX,%R10 |
(148) 0x42e07b LEA (%R15,%R9,8),%RBX |
(148) 0x42e07f MOV 0x50(%RSP),%R9 |
(148) 0x42e084 ADD %RCX,%R10 |
(148) 0x42e087 ADD %RCX,%R8 |
(148) 0x42e08a VMOVUPD (%RBX),%YMM12 |
(148) 0x42e08e ADD %RAX,%R9 |
(148) 0x42e091 ADD %R13,%RAX |
(148) 0x42e094 ADD %RCX,%RAX |
(148) 0x42e097 ADD %RCX,%R9 |
(148) 0x42e09a MOV 0x78(%RSP),%RCX |
(148) 0x42e09f VMOVUPD (%RSI,%RAX,8),%YMM13 |
(148) 0x42e0a4 VFMSUB132PD (%R14,%R10,8),%YMM13,%YMM12 |
(148) 0x42e0aa VADDPD (%RSI,%R9,8),%YMM12,%YMM14 |
(148) 0x42e0b0 VDIVPD (%RCX,%R8,8),%YMM14,%YMM15 |
(148) 0x42e0b6 VMOVUPD %YMM15,(%RBX) |
(148) 0x42e0ba TEST $0x3,%DL |
(148) 0x42e0bd JE 42e1d0 |
(148) 0x42e0c3 AND $-0x4,%EDX |
(148) 0x42e0c6 ADD %EDX,%R11D |
(148) 0x42e0c9 ADD %EDX,%EDI |
(148) 0x42e0cb MOV 0x48(%RSP),%R13 |
(148) 0x42e0d0 MOV 0x68(%RSP),%R8 |
(148) 0x42e0d5 MOVSXD %EDI,%RAX |
(148) 0x42e0d8 MOV 0x40(%RSP),%RBX |
(148) 0x42e0dd MOV 0x50(%RSP),%R10 |
(148) 0x42e0e2 LEA (%R13,%RAX,1),%R9 |
(148) 0x42e0e7 ADD %RAX,%R8 |
(148) 0x42e0ea VMOVSD (%R14,%R9,8),%XMM0 |
(148) 0x42e0f0 VMOVSD (%RSI,%R8,8),%XMM3 |
(148) 0x42e0f6 LEA (%RBX,%RAX,1),%RDX |
(148) 0x42e0fa LEA (%R10,%RAX,1),%RCX |
(148) 0x42e0fe LEA (%R15,%RDX,8),%RDX |
(148) 0x42e102 MOV 0x78(%RSP),%R9 |
(148) 0x42e107 MOV 0x5c(%RSP),%R8D |
(148) 0x42e10c VFMSUB132SD (%RDX),%XMM3,%XMM0 |
(148) 0x42e111 VADDSD (%RSI,%RCX,8),%XMM0,%XMM1 |
(148) 0x42e116 MOV 0x60(%RSP),%RCX |
(148) 0x42e11b ADD %RCX,%RAX |
(148) 0x42e11e VDIVSD (%R9,%RAX,8),%XMM1,%XMM2 |
(148) 0x42e124 VMOVSD %XMM2,(%RDX) |
(148) 0x42e128 LEA 0x1(%R11),%EDX |
(148) 0x42e12c LEA 0x1(%RDI),%EAX |
(148) 0x42e12f CMP %R8D,%EDX |
(148) 0x42e132 JAE 42e1d0 |
(148) 0x42e138 MOV 0x68(%RSP),%R8 |
(148) 0x42e13d CLTQ |
(148) 0x42e13f ADD $0x2,%R11D |
(148) 0x42e143 ADD $0x2,%EDI |
(148) 0x42e146 LEA (%R13,%RAX,1),%R9 |
(148) 0x42e14b LEA (%RBX,%RAX,1),%RCX |
(148) 0x42e14f ADD %RAX,%R8 |
(148) 0x42e152 VMOVSD (%R14,%R9,8),%XMM5 |
(148) 0x42e158 LEA (%R15,%RCX,8),%RDX |
(148) 0x42e15c LEA (%R10,%RAX,1),%RCX |
(148) 0x42e160 VMOVSD (%RSI,%R8,8),%XMM4 |
(148) 0x42e166 MOV 0x60(%RSP),%R9 |
(148) 0x42e16b VFMSUB132SD (%RDX),%XMM4,%XMM5 |
(148) 0x42e170 ADD %R9,%RAX |
(148) 0x42e173 VADDSD (%RSI,%RCX,8),%XMM5,%XMM6 |
(148) 0x42e178 MOV 0x78(%RSP),%RCX |
(148) 0x42e17d VDIVSD (%RCX,%RAX,8),%XMM6,%XMM7 |
(148) 0x42e182 MOV 0x5c(%RSP),%EAX |
(148) 0x42e186 VMOVSD %XMM7,(%RDX) |
(148) 0x42e18a CMP %EAX,%R11D |
(148) 0x42e18d JAE 42e1d0 |
(148) 0x42e18f MOVSXD %EDI,%R11 |
(148) 0x42e192 MOV 0x68(%RSP),%RDI |
(148) 0x42e197 ADD %R11,%R13 |
(148) 0x42e19a ADD %R11,%RBX |
(148) 0x42e19d ADD %R11,%R10 |
(148) 0x42e1a0 ADD %R11,%R9 |
(148) 0x42e1a3 ADD %R11,%RDI |
(148) 0x42e1a6 VMOVSD (%R14,%R13,8),%XMM8 |
(148) 0x42e1ac LEA (%R15,%RBX,8),%R15 |
(148) 0x42e1b0 MOV 0x78(%RSP),%R14 |
(148) 0x42e1b5 VMOVSD (%RSI,%RDI,8),%XMM9 |
(148) 0x42e1ba VFMSUB132SD (%R15),%XMM9,%XMM8 |
(148) 0x42e1bf VADDSD (%RSI,%R10,8),%XMM8,%XMM10 |
(148) 0x42e1c5 VDIVSD (%R14,%R9,8),%XMM10,%XMM11 |
(148) 0x42e1cb VMOVSD %XMM11,(%R15) |
(148) 0x42e1d0 MOV 0x5c(%RSP),%R11D |
(148) 0x42e1d5 INCL 0x70(%RSP) |
(148) 0x42e1d9 INC %R12 |
(148) 0x42e1dc MOV 0x70(%RSP),%ESI |
(148) 0x42e1e0 CMP %ESI,0x3c(%RSP) |
(148) 0x42e1e4 JLE 42e200 |
(148) 0x42e1e6 MOV 0x34(%RSP),%ECX |
(148) 0x42e1ea MOV 0x38(%RSP),%R8D |
(148) 0x42e1ef MOV 0x58(%RSP),%EDX |
(148) 0x42e1f3 MOV %R8D,0x74(%RSP) |
(148) 0x42e1f8 SUB %R11D,%ECX |
(148) 0x42e1fb JMP 42dde8 |
0x42e200 VZEROUPPER |
0x42e203 LEA -0x28(%RBP),%RSP |
0x42e207 POP %RBX |
0x42e208 POP %R12 |
0x42e20a POP %R13 |
0x42e20c POP %R14 |
0x42e20e POP %R15 |
0x42e210 POP %RBP |
0x42e211 RET |
0x42e212 NOPW (%RAX,%RAX,1) |
(148) 0x42e218 MOV 0x74(%RSP),%EDI |
(148) 0x42e21c XOR %ECX,%ECX |
(148) 0x42e21e JMP 42e048 |
0x42e223 INC %ECX |
0x42e225 XOR %EDX,%EDX |
0x42e227 JMP 42dd7e |
0x42e22c NOPL (%RAX) |
Path / |
Source file and lines | advec_mom.cpp:218-221 |
Module | exec |
nb instructions | 80 |
nb uops | 90 |
loop length | 269 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.26-15.25 |
Stall cycles | 0.00-0.65 |
Front-end | 15.00 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 8% |
load | 9% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x3,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
INC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42e203 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x3(%RBX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42e203 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42e223 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x523> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R11,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R8D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R8D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42e203 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x58(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x18(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R9D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42dd7e <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x7e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:218-221 |
Module | exec |
nb instructions | 80 |
nb uops | 90 |
loop length | 269 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 9.00 | 5.87 | 5.70 | 9.00 | 9.00 | 9.00 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.26-15.25 |
Stall cycles | 0.00-0.65 |
Front-end | 15.00 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 8% |
load | 9% |
store | 8% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x3,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
INC %ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDX,0x3c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42e203 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x3(%RBX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42e203 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ECX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x58(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42e223 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x523> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%EDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDI,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R11,1),%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R8D,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R8D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42e203 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x503> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x38(%RSP),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x58(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x18(%R12),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%R12),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R9D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42dd7e <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11+0x7e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.11– | 4.48 | 3.35 |
▼Loop 148 - advec_mom.cpp:220-221 - exec– | 0 | 0 |
○Loop 149 - advec_mom.cpp:221-221 - exec | 4.48 | 3.34 |