Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 3.08% |
---|
Function: _Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buf ... | Module: exec | Source: advec_mom.cpp:167-172 [...] | Coverage: 3.08% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 167 - 172 |
-------------------------------------------------------------------------------- |
167: #pragma omp parallel for simd collapse(2) |
168: for (int j = (y_min - 1 + 1); j < (y_max + 2 + 2); j++) { |
169: for (int i = (x_min + 1); i < (x_max + 1 + 2); i++) { |
170: node_mass_post(i, j) = 0.25 * (density1(i + 0, j - 1) * post_vol(i + 0, j - 1) + density1(i, j) * post_vol(i, j) + |
171: density1(i - 1, j - 1) * post_vol(i - 1, j - 1) + density1(i - 1, j + 0) * post_vol(i - 1, j + 0)); |
172: node_mass_pre(i, j) = node_mass_post(i, j) - node_flux(i + 0, j - 1) + node_flux(i, j); |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x42c9b0 PUSH %RBP |
0x42c9b1 MOV %RSP,%RBP |
0x42c9b4 PUSH %R15 |
0x42c9b6 PUSH %R14 |
0x42c9b8 PUSH %R13 |
0x42c9ba PUSH %R12 |
0x42c9bc PUSH %RBX |
0x42c9bd AND $-0x40,%RSP |
0x42c9c1 SUB $0x100,%RSP |
0x42c9c8 MOV 0x34(%RDI),%EAX |
0x42c9cb MOV 0x28(%RDI),%ESI |
0x42c9ce MOV 0x30(%RDI),%R12D |
0x42c9d2 MOV 0x2c(%RDI),%EDX |
0x42c9d5 ADD $0x4,%EAX |
0x42c9d8 LEA 0x1(%RSI),%ECX |
0x42c9db MOV %EAX,0x68(%RSP) |
0x42c9df MOV %ECX,0x64(%RSP) |
0x42c9e3 CMP %EAX,%R12D |
0x42c9e6 JGE 42d273 |
0x42c9ec MOV %EAX,%R14D |
0x42c9ef LEA 0x3(%RDX),%R15D |
0x42c9f3 SUB %R12D,%R14D |
0x42c9f6 CMP %R15D,%ECX |
0x42c9f9 JGE 42d273 |
0x42c9ff MOV %RDI,%RBX |
0x42ca02 MOV %R15D,%EDI |
0x42ca05 SUB %ECX,%EDI |
0x42ca07 MOV %EDI,0x6c(%RSP) |
0x42ca0b CALL 4046c0 <omp_get_num_threads@plt> |
0x42ca10 MOV %EAX,%R13D |
0x42ca13 CALL 4045b0 <omp_get_thread_num@plt> |
0x42ca18 XOR %EDX,%EDX |
0x42ca1a MOV %EAX,%R8D |
0x42ca1d MOV 0x6c(%RSP),%EAX |
0x42ca21 IMUL %R14D,%EAX |
0x42ca25 DIV %R13D |
0x42ca28 MOV %EAX,%ECX |
0x42ca2a CMP %EDX,%R8D |
0x42ca2d JB 42d296 |
0x42ca33 IMUL %ECX,%R8D |
0x42ca37 LEA (%R8,%RDX,1),%R10D |
0x42ca3b LEA (%RCX,%R10,1),%R9D |
0x42ca3f MOV %R9D,0x60(%RSP) |
0x42ca44 CMP %R9D,%R10D |
0x42ca47 JAE 42d273 |
0x42ca4d MOV %R10D,%EAX |
0x42ca50 XOR %EDX,%EDX |
0x42ca52 MOV 0x64(%RSP),%R11D |
0x42ca57 MOV (%RBX),%RSI |
0x42ca5a DIVL 0x6c(%RSP) |
0x42ca5e MOV 0x10(%RBX),%R14 |
0x42ca62 MOV 0x8(%RBX),%RDI |
0x42ca66 MOV %R10D,0xfc(%RSP) |
0x42ca6e VMOVSD 0x35e52(%RIP),%XMM3 |
0x42ca76 MOV %RSI,0x40(%RSP) |
0x42ca7b MOV %R14,0x30(%RSP) |
0x42ca80 MOV %RDI,0x28(%RSP) |
0x42ca85 MOV %R15D,%R8D |
0x42ca88 MOV 0x20(%RBX),%R15 |
0x42ca8c MOV 0x18(%RBX),%RBX |
0x42ca90 VBROADCASTSD %XMM3,%YMM4 |
0x42ca95 VBROADCASTSD %XMM3,%ZMM2 |
0x42ca9b MOV %R15,0x38(%RSP) |
0x42caa0 MOV %RBX,0x20(%RSP) |
0x42caa5 ADD %R12D,%EAX |
0x42caa8 ADD %EDX,%R11D |
0x42caab MOV %EAX,0xac(%RSP) |
0x42cab2 CLTQ |
0x42cab4 SUB %R11D,%R8D |
0x42cab7 MOV %R11D,0xf8(%RSP) |
0x42cabf MOV %RAX,0x88(%RSP) |
0x42cac7 NOPW (%RAX,%RAX,1) |
(143) 0x42cad0 CMP %R8D,%ECX |
(143) 0x42cad3 MOV 0xfc(%RSP),%R12D |
(143) 0x42cadb CMOVBE %ECX,%R8D |
(143) 0x42cadf LEA (%R12,%R8,1),%ECX |
(143) 0x42cae3 MOV %ECX,0xa8(%RSP) |
(143) 0x42caea CMP %ECX,%R12D |
(143) 0x42caed JAE 42d22d |
(143) 0x42caf3 MOV 0x40(%RSP),%R10 |
(143) 0x42caf8 MOV 0x30(%RSP),%R14 |
(143) 0x42cafd MOV 0xac(%RSP),%EAX |
(143) 0x42cb04 MOV 0x38(%RSP),%R9 |
(143) 0x42cb09 MOV (%R10),%RCX |
(143) 0x42cb0c MOV 0x10(%R14),%RDX |
(143) 0x42cb10 MOV 0x10(%R10),%R13 |
(143) 0x42cb14 MOV 0x28(%RSP),%R10 |
(143) 0x42cb19 DEC %EAX |
(143) 0x42cb1b CLTQ |
(143) 0x42cb1d MOV (%R9),%R11 |
(143) 0x42cb20 MOV 0x10(%R9),%R15 |
(143) 0x42cb24 MOV %RDX,0xd8(%RSP) |
(143) 0x42cb2c MOV (%R10),%RDX |
(143) 0x42cb2f MOV %RAX,%RSI |
(143) 0x42cb32 MOV %RAX,%R9 |
(143) 0x42cb35 MOV 0x88(%RSP),%R12 |
(143) 0x42cb3d MOV (%R14),%RBX |
(143) 0x42cb40 IMUL %RCX,%RSI |
(143) 0x42cb44 MOV 0x10(%R10),%R14 |
(143) 0x42cb48 IMUL %RDX,%RAX |
(143) 0x42cb4c IMUL %R12,%RBX |
(143) 0x42cb50 MOV %R14,0xb8(%RSP) |
(143) 0x42cb58 IMUL %R11,%R9 |
(143) 0x42cb5c MOV %RSI,0x90(%RSP) |
(143) 0x42cb64 MOV %RAX,0xe0(%RSP) |
(143) 0x42cb6c MOV 0x20(%RSP),%RAX |
(143) 0x42cb71 IMUL %R12,%RCX |
(143) 0x42cb75 IMUL %R12,%R11 |
(143) 0x42cb79 MOV %RBX,0xb0(%RSP) |
(143) 0x42cb81 LEA -0x1(%R8),%EBX |
(143) 0x42cb85 IMUL %R12,%RDX |
(143) 0x42cb89 MOV 0x10(%RAX),%RDI |
(143) 0x42cb8d MOV %R9,0xc0(%RSP) |
(143) 0x42cb95 IMUL (%RAX),%R12 |
(143) 0x42cb99 MOV %RCX,0xc8(%RSP) |
(143) 0x42cba1 MOV %R11,0xd0(%RSP) |
(143) 0x42cba9 MOV %RDX,0xf0(%RSP) |
(143) 0x42cbb1 MOV %RDI,0xe8(%RSP) |
(143) 0x42cbb9 MOV %R12,0xa0(%RSP) |
(143) 0x42cbc1 CMP $0x6,%EBX |
(143) 0x42cbc4 JBE 42d288 |
(143) 0x42cbca MOVSXD 0xf8(%RSP),%RAX |
(143) 0x42cbd2 LEA (%R11,%RAX,1),%RDI |
(143) 0x42cbd6 LEA (%RSI,%RAX,1),%R10 |
(143) 0x42cbda ADD %RAX,%RCX |
(143) 0x42cbdd SAL $0x3,%RDI |
(143) 0x42cbe1 SAL $0x3,%R10 |
(143) 0x42cbe5 LEA (%R9,%RAX,1),%RSI |
(143) 0x42cbe9 SAL $0x3,%RCX |
(143) 0x42cbed LEA (%R15,%RDI,1),%R11 |
(143) 0x42cbf1 SAL $0x3,%RSI |
(143) 0x42cbf5 LEA -0x8(%R13,%R10,1),%RDX |
(143) 0x42cbfa MOV %R11,0x78(%RSP) |
(143) 0x42cbff LEA (%R13,%RCX,1),%R14 |
(143) 0x42cc04 LEA -0x8(%R13,%RCX,1),%R11 |
(143) 0x42cc09 MOV 0xb0(%RSP),%RCX |
(143) 0x42cc11 LEA (%R15,%RSI,1),%R9 |
(143) 0x42cc15 MOV %RDX,0x80(%RSP) |
(143) 0x42cc1d LEA (%R13,%R10,1),%RBX |
(143) 0x42cc22 MOV 0xe0(%RSP),%RDX |
(143) 0x42cc2a LEA -0x8(%R15,%RDI,1),%R10 |
(143) 0x42cc2f MOV 0xd8(%RSP),%RDI |
(143) 0x42cc37 MOV %R9,0x70(%RSP) |
(143) 0x42cc3c LEA -0x8(%R15,%RSI,1),%R9 |
(143) 0x42cc41 LEA (%RCX,%RAX,1),%RSI |
(143) 0x42cc45 MOV 0xb8(%RSP),%RCX |
(143) 0x42cc4d LEA (%RDI,%RSI,8),%RDI |
(143) 0x42cc51 LEA (%RDX,%RAX,1),%RSI |
(143) 0x42cc55 MOV 0xf0(%RSP),%RDX |
(143) 0x42cc5d LEA (%RCX,%RSI,8),%RCX |
(143) 0x42cc61 MOV 0xb8(%RSP),%RSI |
(143) 0x42cc69 LEA (%RDX,%RAX,1),%RDX |
(143) 0x42cc6d ADD %R12,%RAX |
(143) 0x42cc70 LEA (%RSI,%RDX,8),%RSI |
(143) 0x42cc74 MOV 0xe8(%RSP),%RDX |
(143) 0x42cc7c LEA (%RDX,%RAX,8),%RDX |
(143) 0x42cc80 MOV %R8D,%EAX |
(143) 0x42cc83 SHR $0x3,%EAX |
(143) 0x42cc86 MOV %RAX,%R12 |
(143) 0x42cc89 SAL $0x6,%RAX |
(143) 0x42cc8d MOV %RAX,0x98(%RSP) |
(143) 0x42cc95 XOR %EAX,%EAX |
(143) 0x42cc97 AND $0x1,%R12D |
(143) 0x42cc9b JE 42cd24 |
(143) 0x42cca1 MOV 0x78(%RSP),%RAX |
(143) 0x42cca6 VMOVUPD (%R10),%ZMM5 |
(143) 0x42ccac MOV 0x70(%RSP),%R12 |
(143) 0x42ccb1 VMOVUPD (%R9),%ZMM8 |
(143) 0x42ccb7 VMOVUPD (%RAX),%ZMM6 |
(143) 0x42ccbd VMULPD (%R11),%ZMM5,%ZMM1 |
(143) 0x42ccc3 VMOVUPD (%R12),%ZMM7 |
(143) 0x42ccca MOV 0x80(%RSP),%RAX |
(143) 0x42ccd2 VMULPD (%R14),%ZMM6,%ZMM0 |
(143) 0x42ccd8 MOV 0x98(%RSP),%R12 |
(143) 0x42cce0 VFMADD231PD (%RAX),%ZMM8,%ZMM1 |
(143) 0x42cce6 MOV $0x40,%EAX |
(143) 0x42cceb VFMADD231PD (%RBX),%ZMM7,%ZMM0 |
(143) 0x42ccf1 VADDPD %ZMM1,%ZMM0,%ZMM9 |
(143) 0x42ccf7 VMULPD %ZMM2,%ZMM9,%ZMM10 |
(143) 0x42ccfd VMOVUPD %ZMM10,(%RDI) |
(143) 0x42cd03 VMOVUPD (%RSI),%ZMM11 |
(143) 0x42cd09 VSUBPD (%RCX),%ZMM11,%ZMM12 |
(143) 0x42cd0f VADDPD %ZMM10,%ZMM12,%ZMM13 |
(143) 0x42cd15 VMOVUPD %ZMM13,(%RDX) |
(143) 0x42cd1b CMP %R12,%RAX |
(143) 0x42cd1e JE 42ce42 |
(143) 0x42cd24 MOV %R15,0x50(%RSP) |
(143) 0x42cd29 MOV 0x70(%RSP),%R12 |
(143) 0x42cd2e MOV %R8D,0x5c(%RSP) |
(143) 0x42cd33 MOV 0x80(%RSP),%R8 |
(143) 0x42cd3b MOV %R13,0x48(%RSP) |
(143) 0x42cd40 MOV 0x78(%RSP),%R13 |
(144) 0x42cd45 VMOVUPD (%R13,%RAX,1),%ZMM14 |
(144) 0x42cd4d VMOVUPD (%R10,%RAX,1),%ZMM0 |
(144) 0x42cd54 VMOVUPD (%R12,%RAX,1),%ZMM6 |
(144) 0x42cd5b VMOVUPD (%R9,%RAX,1),%ZMM5 |
(144) 0x42cd62 VMULPD (%R14,%RAX,1),%ZMM14,%ZMM15 |
(144) 0x42cd69 MOV 0x98(%RSP),%R15 |
(144) 0x42cd71 VMULPD (%R11,%RAX,1),%ZMM0,%ZMM7 |
(144) 0x42cd78 VFMADD231PD (%RBX,%RAX,1),%ZMM6,%ZMM15 |
(144) 0x42cd7f VFMADD231PD (%R8,%RAX,1),%ZMM5,%ZMM7 |
(144) 0x42cd86 VADDPD %ZMM7,%ZMM15,%ZMM1 |
(144) 0x42cd8c VMULPD %ZMM2,%ZMM1,%ZMM8 |
(144) 0x42cd92 VMOVUPD %ZMM8,(%RDI,%RAX,1) |
(144) 0x42cd99 VMOVUPD (%RSI,%RAX,1),%ZMM9 |
(144) 0x42cda0 VSUBPD (%RCX,%RAX,1),%ZMM9,%ZMM10 |
(144) 0x42cda7 VADDPD %ZMM8,%ZMM10,%ZMM11 |
(144) 0x42cdad VMOVUPD %ZMM11,(%RDX,%RAX,1) |
(144) 0x42cdb4 VMOVUPD 0x40(%R13,%RAX,1),%ZMM12 |
(144) 0x42cdbc VMOVUPD 0x40(%R10,%RAX,1),%ZMM15 |
(144) 0x42cdc4 VMOVUPD 0x40(%R12,%RAX,1),%ZMM14 |
(144) 0x42cdcc VMOVUPD 0x40(%R9,%RAX,1),%ZMM0 |
(144) 0x42cdd4 VMULPD 0x40(%R14,%RAX,1),%ZMM12,%ZMM13 |
(144) 0x42cddc VMULPD 0x40(%R11,%RAX,1),%ZMM15,%ZMM6 |
(144) 0x42cde4 VFMADD231PD 0x40(%RBX,%RAX,1),%ZMM14,%ZMM13 |
(144) 0x42cdec VFMADD231PD 0x40(%R8,%RAX,1),%ZMM0,%ZMM6 |
(144) 0x42cdf4 VADDPD %ZMM6,%ZMM13,%ZMM7 |
(144) 0x42cdfa VMULPD %ZMM2,%ZMM7,%ZMM8 |
(144) 0x42ce00 VMOVUPD %ZMM8,0x40(%RDI,%RAX,1) |
(144) 0x42ce08 VMOVUPD 0x40(%RSI,%RAX,1),%ZMM5 |
(144) 0x42ce10 VSUBPD 0x40(%RCX,%RAX,1),%ZMM5,%ZMM1 |
(144) 0x42ce18 VADDPD %ZMM8,%ZMM1,%ZMM9 |
(144) 0x42ce1e VMOVUPD %ZMM9,0x40(%RDX,%RAX,1) |
(144) 0x42ce26 SUB $-0x80,%RAX |
(144) 0x42ce2a CMP %R15,%RAX |
(144) 0x42ce2d JNE 42cd45 |
(143) 0x42ce33 MOV 0x5c(%RSP),%R8D |
(143) 0x42ce38 MOV 0x50(%RSP),%R15 |
(143) 0x42ce3d MOV 0x48(%RSP),%R13 |
(143) 0x42ce42 MOV 0xf8(%RSP),%EBX |
(143) 0x42ce49 MOV %R8D,%EDX |
(143) 0x42ce4c AND $-0x8,%EDX |
(143) 0x42ce4f ADD %EDX,0xfc(%RSP) |
(143) 0x42ce56 LEA (%RDX,%RBX,1),%ECX |
(143) 0x42ce59 TEST $0x7,%R8B |
(143) 0x42ce5d JE 42d21d |
(143) 0x42ce63 MOV %R8D,%ESI |
(143) 0x42ce66 SUB %EDX,%ESI |
(143) 0x42ce68 LEA -0x1(%RSI),%R14D |
(143) 0x42ce6c CMP $0x2,%R14D |
(143) 0x42ce70 JBE 42cf7b |
(143) 0x42ce76 MOVSXD 0xf8(%RSP),%RAX |
(143) 0x42ce7e MOV 0x90(%RSP),%R9 |
(143) 0x42ce86 MOV 0xc8(%RSP),%R11 |
(143) 0x42ce8e MOV 0xd0(%RSP),%R10 |
(143) 0x42ce96 LEA (%R9,%RAX,1),%R8 |
(143) 0x42ce9a MOV 0xc0(%RSP),%RDI |
(143) 0x42cea2 MOV 0xe0(%RSP),%RBX |
(143) 0x42ceaa LEA (%R11,%RAX,1),%R9 |
(143) 0x42ceae LEA (%R10,%RAX,1),%R10 |
(143) 0x42ceb2 ADD %RDX,%R8 |
(143) 0x42ceb5 MOV 0xb0(%RSP),%R12 |
(143) 0x42cebd ADD %RDX,%R9 |
(143) 0x42cec0 ADD %RDX,%R10 |
(143) 0x42cec3 VMOVUPD (%R13,%R8,8),%YMM12 |
(143) 0x42ceca VMOVUPD -0x8(%R13,%R8,8),%YMM15 |
(143) 0x42ced1 VMOVUPD (%R13,%R9,8),%YMM10 |
(143) 0x42ced8 VMOVUPD -0x8(%R15,%R10,8),%YMM13 |
(143) 0x42cedf ADD %RAX,%RDI |
(143) 0x42cee2 LEA (%RBX,%RAX,1),%R11 |
(143) 0x42cee6 ADD %RDX,%RDI |
(143) 0x42cee9 MOV 0xf0(%RSP),%RBX |
(143) 0x42cef1 MOV 0xa0(%RSP),%R14 |
(143) 0x42cef9 LEA (%R12,%RAX,1),%R12 |
(143) 0x42cefd VMULPD (%R15,%R10,8),%YMM10,%YMM11 |
(143) 0x42cf03 ADD %RDX,%R12 |
(143) 0x42cf06 ADD %RDX,%R11 |
(143) 0x42cf09 MOV 0xb8(%RSP),%R8 |
(143) 0x42cf11 VMULPD -0x8(%R13,%R9,8),%YMM13,%YMM14 |
(143) 0x42cf18 ADD %RAX,%RBX |
(143) 0x42cf1b ADD %R14,%RAX |
(143) 0x42cf1e ADD %RDX,%RBX |
(143) 0x42cf21 ADD %RDX,%RAX |
(143) 0x42cf24 MOV 0xd8(%RSP),%RDX |
(143) 0x42cf2c VFMADD231PD (%R15,%RDI,8),%YMM12,%YMM11 |
(143) 0x42cf32 VFMADD231PD -0x8(%R15,%RDI,8),%YMM15,%YMM14 |
(143) 0x42cf39 MOV 0xe8(%RSP),%RDI |
(143) 0x42cf41 VADDPD %YMM14,%YMM11,%YMM6 |
(143) 0x42cf46 VMULPD %YMM4,%YMM6,%YMM0 |
(143) 0x42cf4a VMOVUPD %YMM0,(%RDX,%R12,8) |
(143) 0x42cf50 VMOVUPD (%R8,%RBX,8),%YMM7 |
(143) 0x42cf56 VSUBPD (%R8,%R11,8),%YMM7,%YMM8 |
(143) 0x42cf5c VADDPD %YMM0,%YMM8,%YMM5 |
(143) 0x42cf60 VMOVUPD %YMM5,(%RDI,%RAX,8) |
(143) 0x42cf65 TEST $0x3,%SIL |
(143) 0x42cf69 JE 42d21d |
(143) 0x42cf6f AND $-0x4,%ESI |
(143) 0x42cf72 ADD %ESI,0xfc(%RSP) |
(143) 0x42cf79 ADD %ESI,%ECX |
(143) 0x42cf7b MOV 0xc0(%RSP),%R10 |
(143) 0x42cf83 MOV 0xd0(%RSP),%RBX |
(143) 0x42cf8b MOVSXD %ECX,%RAX |
(143) 0x42cf8e LEA -0x1(%RCX),%EDX |
(143) 0x42cf91 MOVSXD %EDX,%RDX |
(143) 0x42cf94 MOV 0xc8(%RSP),%R14 |
(143) 0x42cf9c MOV 0x90(%RSP),%R12 |
(143) 0x42cfa4 LEA (%R10,%RAX,1),%R9 |
(143) 0x42cfa8 ADD %RAX,%RBX |
(143) 0x42cfab LEA (%R15,%R9,8),%RDI |
(143) 0x42cfaf LEA (%R15,%RBX,8),%R9 |
(143) 0x42cfb3 MOV 0xd0(%RSP),%RBX |
(143) 0x42cfbb LEA (%R14,%RAX,1),%R11 |
(143) 0x42cfbf VMOVSD (%R9),%XMM1 |
(143) 0x42cfc4 VMOVSD (%RDI),%XMM10 |
(143) 0x42cfc8 LEA (%R12,%RAX,1),%RSI |
(143) 0x42cfcc ADD %RDX,%RBX |
(143) 0x42cfcf LEA (%R13,%R11,8),%R8 |
(143) 0x42cfd4 LEA (%RDX,%R10,1),%R11 |
(143) 0x42cfd8 VMOVSD (%R15,%RBX,8),%XMM11 |
(143) 0x42cfde LEA (%RDX,%R12,1),%R10 |
(143) 0x42cfe2 ADD %R14,%RDX |
(143) 0x42cfe5 VMOVSD (%R15,%R11,8),%XMM13 |
(143) 0x42cfeb VMULSD (%R8),%XMM1,%XMM9 |
(143) 0x42cff0 LEA (%R13,%RSI,8),%RSI |
(143) 0x42cff5 MOV 0xb0(%RSP),%R14 |
(143) 0x42cffd VMULSD (%R13,%RDX,8),%XMM11,%XMM12 |
(143) 0x42d004 MOV 0xd8(%RSP),%R11 |
(143) 0x42d00c LEA (%R14,%RAX,1),%RDX |
(143) 0x42d010 MOV 0xe0(%RSP),%R14 |
(143) 0x42d018 VFMADD231SD (%RSI),%XMM10,%XMM9 |
(143) 0x42d01d VFMADD231SD (%R13,%R10,8),%XMM13,%XMM12 |
(143) 0x42d024 MOV 0xa0(%RSP),%R10 |
(143) 0x42d02c LEA (%R10,%RAX,1),%RBX |
(143) 0x42d030 MOV 0xa8(%RSP),%R10D |
(143) 0x42d038 VADDSD %XMM12,%XMM9,%XMM14 |
(143) 0x42d03d VMULSD %XMM3,%XMM14,%XMM15 |
(143) 0x42d041 VMOVSD %XMM15,(%R11,%RDX,8) |
(143) 0x42d047 MOV 0xf0(%RSP),%RDX |
(143) 0x42d04f MOV 0xfc(%RSP),%R11D |
(143) 0x42d057 ADD %RAX,%RDX |
(143) 0x42d05a ADD %R14,%RAX |
(143) 0x42d05d MOV 0xb8(%RSP),%R14 |
(143) 0x42d065 INC %R11D |
(143) 0x42d068 VMOVSD (%R14,%RDX,8),%XMM6 |
(143) 0x42d06e VSUBSD (%R14,%RAX,8),%XMM6,%XMM0 |
(143) 0x42d074 MOV 0xe8(%RSP),%RAX |
(143) 0x42d07c VADDSD %XMM15,%XMM0,%XMM7 |
(143) 0x42d081 VMOVSD %XMM7,(%RAX,%RBX,8) |
(143) 0x42d086 LEA 0x1(%RCX),%EAX |
(143) 0x42d089 CMP %R10D,%R11D |
(143) 0x42d08c JAE 42d21d |
(143) 0x42d092 MOV 0xc0(%RSP),%R11 |
(143) 0x42d09a CLTQ |
(143) 0x42d09c VMOVSD (%R9),%XMM9 |
(143) 0x42d0a1 ADD $0x2,%ECX |
(143) 0x42d0a4 LEA (%R12,%RAX,1),%RBX |
(143) 0x42d0a8 VMOVSD (%RDI),%XMM11 |
(143) 0x42d0ac MOV 0xb0(%RSP),%R9 |
(143) 0x42d0b4 LEA (%R11,%RAX,1),%R10 |
(143) 0x42d0b8 MOV 0xd0(%RSP),%R11 |
(143) 0x42d0c0 LEA (%R13,%RBX,8),%RDX |
(143) 0x42d0c5 MOV 0xc8(%RSP),%RBX |
(143) 0x42d0cd VMULSD (%R8),%XMM9,%XMM10 |
(143) 0x42d0d2 LEA (%R15,%R10,8),%R10 |
(143) 0x42d0d6 MOV 0xd8(%RSP),%RDI |
(143) 0x42d0de ADD %RAX,%R11 |
(143) 0x42d0e1 ADD %RAX,%RBX |
(143) 0x42d0e4 VMOVSD (%R10),%XMM1 |
(143) 0x42d0e9 MOV 0xf0(%RSP),%R8 |
(143) 0x42d0f1 LEA (%R15,%R11,8),%R11 |
(143) 0x42d0f5 LEA (%R13,%RBX,8),%RBX |
(143) 0x42d0fa VMOVSD (%R11),%XMM8 |
(143) 0x42d0ff VMULSD (%RBX),%XMM8,%XMM5 |
(143) 0x42d103 VFMADD231SD (%RSI),%XMM11,%XMM10 |
(143) 0x42d108 LEA (%R9,%RAX,1),%RSI |
(143) 0x42d10c VFMADD132SD (%RDX),%XMM5,%XMM1 |
(143) 0x42d111 VADDSD %XMM1,%XMM10,%XMM12 |
(143) 0x42d115 VMULSD %XMM3,%XMM12,%XMM13 |
(143) 0x42d119 VMOVSD %XMM13,(%RDI,%RSI,8) |
(143) 0x42d11e LEA (%R8,%RAX,1),%RDI |
(143) 0x42d122 MOV 0xa0(%RSP),%RSI |
(143) 0x42d12a MOV 0xe0(%RSP),%R8 |
(143) 0x42d132 VMOVSD (%R14,%RDI,8),%XMM14 |
(143) 0x42d138 MOV 0xfc(%RSP),%EDI |
(143) 0x42d13f ADD %RAX,%RSI |
(143) 0x42d142 ADD %R8,%RAX |
(143) 0x42d145 VSUBSD (%R14,%RAX,8),%XMM14,%XMM15 |
(143) 0x42d14b MOV 0xe8(%RSP),%RAX |
(143) 0x42d153 ADD $0x2,%EDI |
(143) 0x42d156 VADDSD %XMM13,%XMM15,%XMM6 |
(143) 0x42d15b VMOVSD %XMM6,(%RAX,%RSI,8) |
(143) 0x42d160 MOV 0xa8(%RSP),%ESI |
(143) 0x42d167 CMP %ESI,%EDI |
(143) 0x42d169 JAE 42d21d |
(143) 0x42d16f MOV 0xd0(%RSP),%RDI |
(143) 0x42d177 MOVSXD %ECX,%RCX |
(143) 0x42d17a MOV 0xc8(%RSP),%R8 |
(143) 0x42d182 VMOVSD (%RBX),%XMM5 |
(143) 0x42d186 MOV 0xc0(%RSP),%RAX |
(143) 0x42d18e ADD %RCX,%R12 |
(143) 0x42d191 ADD %RCX,%R9 |
(143) 0x42d194 ADD %RCX,%RDI |
(143) 0x42d197 ADD %RCX,%R8 |
(143) 0x42d19a VMOVSD (%R10),%XMM9 |
(143) 0x42d19f MOV 0xe0(%RSP),%R10 |
(143) 0x42d1a7 VMOVSD (%R15,%RDI,8),%XMM0 |
(143) 0x42d1ad VMULSD (%R11),%XMM5,%XMM1 |
(143) 0x42d1b2 ADD %RCX,%RAX |
(143) 0x42d1b5 VMOVSD (%R15,%RAX,8),%XMM7 |
(143) 0x42d1bb ADD %RCX,%R10 |
(143) 0x42d1be MOV 0xa0(%RSP),%R15 |
(143) 0x42d1c6 VMULSD (%R13,%R8,8),%XMM0,%XMM8 |
(143) 0x42d1cd ADD %RCX,%R15 |
(143) 0x42d1d0 VFMADD231SD (%RDX),%XMM9,%XMM1 |
(143) 0x42d1d5 MOV 0xf0(%RSP),%RDX |
(143) 0x42d1dd VFMADD231SD (%R13,%R12,8),%XMM7,%XMM8 |
(143) 0x42d1e4 MOV 0xd8(%RSP),%R13 |
(143) 0x42d1ec ADD %RCX,%RDX |
(143) 0x42d1ef VADDSD %XMM1,%XMM8,%XMM10 |
(143) 0x42d1f3 VMULSD %XMM3,%XMM10,%XMM11 |
(143) 0x42d1f7 VMOVSD %XMM11,(%R13,%R9,8) |
(143) 0x42d1fe VMOVSD (%R14,%RDX,8),%XMM12 |
(143) 0x42d204 VSUBSD (%R14,%R10,8),%XMM12,%XMM13 |
(143) 0x42d20a MOV 0xe8(%RSP),%R14 |
(143) 0x42d212 VADDSD %XMM11,%XMM13,%XMM14 |
(143) 0x42d217 VMOVSD %XMM14,(%R14,%R15,8) |
(143) 0x42d21d MOV 0xa8(%RSP),%R11D |
(143) 0x42d225 MOV %R11D,0xfc(%RSP) |
(143) 0x42d22d INCL 0xac(%RSP) |
(143) 0x42d234 INCQ 0x88(%RSP) |
(143) 0x42d23c MOV 0xac(%RSP),%ESI |
(143) 0x42d243 CMP %ESI,0x68(%RSP) |
(143) 0x42d247 JLE 42d270 |
(143) 0x42d249 MOV 0x60(%RSP),%ECX |
(143) 0x42d24d MOV 0xfc(%RSP),%EAX |
(143) 0x42d254 MOV 0x64(%RSP),%R12D |
(143) 0x42d259 MOV 0x6c(%RSP),%R8D |
(143) 0x42d25e SUB %EAX,%ECX |
(143) 0x42d260 MOV %R12D,0xf8(%RSP) |
(143) 0x42d268 JMP 42cad0 |
0x42d26d NOPL (%RAX) |
0x42d270 VZEROUPPER |
0x42d273 LEA -0x28(%RBP),%RSP |
0x42d277 POP %RBX |
0x42d278 POP %R12 |
0x42d27a POP %R13 |
0x42d27c POP %R14 |
0x42d27e POP %R15 |
0x42d280 POP %RBP |
0x42d281 RET |
0x42d282 NOPW (%RAX,%RAX,1) |
(143) 0x42d288 MOV 0xf8(%RSP),%ECX |
(143) 0x42d28f XOR %EDX,%EDX |
(143) 0x42d291 JMP 42ce63 |
0x42d296 INC %ECX |
0x42d298 XOR %EDX,%EDX |
0x42d29a JMP 42ca33 |
0x42d29f NOP |
Path / |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 87 |
nb uops | 97 |
loop length | 325 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 1 |
used zmm registers | 1 |
nb stack references | 14 |
micro-operation queue | 16.17 cycles |
front end | 16.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.30 | 8.00 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
cycles | 6.30 | 11.90 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.43-15.52 |
Stall cycles | 0.00 |
Front-end | 16.17 |
Dispatch | 11.90 |
DIV/SQRT | 12.00 |
Overall L1 | 16.17 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 10% |
all | 8% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 9% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x100,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x34(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ECX,0x64(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EAX,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42d273 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x3(%RDX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R12D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R15D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42d273 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ECX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x6c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x6c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R14D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42d296 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R10,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42d273 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x64(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x6c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0xfc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x35e52(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x20(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD %XMM3,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R12D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0xac(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SUB %R11D,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11D,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42ca33 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x83> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:167-172 |
Module | exec |
nb instructions | 87 |
nb uops | 97 |
loop length | 325 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 1 |
used ymm registers | 1 |
used zmm registers | 1 |
nb stack references | 14 |
micro-operation queue | 16.17 cycles |
front end | 16.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.30 | 8.00 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
cycles | 6.30 | 11.90 | 6.67 | 6.67 | 10.50 | 6.20 | 6.30 | 10.50 | 10.50 | 10.50 | 6.20 | 6.67 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 15.43-15.52 |
Stall cycles | 0.00 |
Front-end | 16.17 |
Dispatch | 11.90 |
DIV/SQRT | 12.00 |
Overall L1 | 16.17 |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 10% |
all | 8% |
load | 8% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
all | 9% |
load | 9% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x100,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x34(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RSI),%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ECX,0x64(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EAX,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42d273 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EAX,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x3(%RDX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R12D,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R15D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 42d273 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ECX,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x6c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x6c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R14D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 42d296 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %ECX,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RCX,%R10,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 42d273 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x8c3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R10D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x64(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RBX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x6c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0xfc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD 0x35e52(%RIP),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x20(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VBROADCASTSD %XMM3,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VBROADCASTSD %XMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R12D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %EDX,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0xac(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
SUB %R11D,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11D,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x88(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 42ca33 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9+0x83> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.9– | 3.08 | 2.3 |
▼Loop 143 - advec_mom.cpp:169-172 - exec– | 0 | 0 |
○Loop 144 - advec_mom.cpp:170-172 - exec | 3.08 | 2.3 |