Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:71-75 [...] | Coverage: 2.08% |
---|
Function: advec_mom_kernel(int, int, int, int, clover::Buffer2D<double>&, clover::Buffer2D<double>&, ... | Module: exec | Source: advec_mom.cpp:71-75 [...] | Coverage: 2.08% |
---|
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_mom.cpp: 71 - 75 |
-------------------------------------------------------------------------------- |
71: #pragma omp parallel for simd collapse(2) |
72: for (int j = (y_min - 2 + 1); j < (y_max + 2 + 2); j++) { |
73: for (int i = (x_min - 2 + 1); i < (x_max + 2 + 2); i++) { |
74: post_vol(i, j) = volume(i, j); |
75: pre_vol(i, j) = post_vol(i, j) + vol_flux_x(i + 1, j + 0) - vol_flux_x(i, j); |
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 69 - 69 |
-------------------------------------------------------------------------------- |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x41d360 PUSH %RBP |
0x41d361 MOV %RSP,%RBP |
0x41d364 PUSH %R15 |
0x41d366 PUSH %R14 |
0x41d368 PUSH %R13 |
0x41d36a PUSH %R12 |
0x41d36c PUSH %RBX |
0x41d36d AND $-0x40,%RSP |
0x41d371 ADD $-0x80,%RSP |
0x41d375 MOV 0x28(%RDI),%EAX |
0x41d378 MOV 0x2c(%RDI),%EDX |
0x41d37b MOV 0x20(%RDI),%EBX |
0x41d37e MOV 0x24(%RDI),%ECX |
0x41d381 ADD $0x4,%EDX |
0x41d384 LEA -0x1(%RAX),%R15D |
0x41d388 LEA -0x1(%RBX),%ESI |
0x41d38b MOV %EDX,0x48(%RSP) |
0x41d38f MOV %ESI,0x44(%RSP) |
0x41d393 CMP %EDX,%R15D |
0x41d396 JGE 41d943 |
0x41d39c MOV %EDX,%EBX |
0x41d39e LEA 0x4(%RCX),%R14D |
0x41d3a2 SUB %R15D,%EBX |
0x41d3a5 CMP %R14D,%ESI |
0x41d3a8 JGE 41d943 |
0x41d3ae MOV %RDI,%R13 |
0x41d3b1 MOV %R14D,%EDI |
0x41d3b4 SUB %ESI,%EDI |
0x41d3b6 MOV %EDI,0x4c(%RSP) |
0x41d3ba CALL 4046c0 <omp_get_num_threads@plt> |
0x41d3bf MOV %EAX,%R12D |
0x41d3c2 CALL 4045b0 <omp_get_thread_num@plt> |
0x41d3c7 XOR %EDX,%EDX |
0x41d3c9 MOV %EAX,%R8D |
0x41d3cc MOV 0x4c(%RSP),%EAX |
0x41d3d0 IMUL %EBX,%EAX |
0x41d3d3 DIV %R12D |
0x41d3d6 MOV %EAX,%EDI |
0x41d3d8 CMP %EDX,%R8D |
0x41d3db JB 41d964 |
0x41d3e1 IMUL %EDI,%R8D |
0x41d3e5 LEA (%R8,%RDX,1),%R11D |
0x41d3e9 LEA (%RDI,%R11,1),%R9D |
0x41d3ed MOV %R9D,0x40(%RSP) |
0x41d3f2 CMP %R9D,%R11D |
0x41d3f5 JAE 41d943 |
0x41d3fb MOV %R11D,%EAX |
0x41d3fe XOR %EDX,%EDX |
0x41d400 MOV 0x44(%RSP),%R10D |
0x41d405 MOV 0x18(%R13),%RSI |
0x41d409 DIVL 0x4c(%RSP) |
0x41d40d MOV 0x10(%R13),%RBX |
0x41d411 MOV %RSI,0x30(%RSP) |
0x41d416 MOV %RBX,0x20(%RSP) |
0x41d41b ADD %EDX,%R10D |
0x41d41e ADD %R15D,%EAX |
0x41d421 MOV %R14D,%EDX |
0x41d424 MOV 0x8(%R13),%R15 |
0x41d428 MOV (%R13),%R14 |
0x41d42c MOV %R10D,0x74(%RSP) |
0x41d431 SUB %R10D,%EDX |
0x41d434 MOVSXD %EAX,%R12 |
0x41d437 MOV %R15,0x38(%RSP) |
0x41d43c MOV %R14,0x28(%RSP) |
0x41d441 NOPL (%RAX) |
(120) 0x41d448 CMP %EDX,%EDI |
(120) 0x41d44a CMOVBE %EDI,%EDX |
(120) 0x41d44d LEA (%R11,%RDX,1),%ECX |
(120) 0x41d451 MOV %ECX,0x70(%RSP) |
(120) 0x41d455 CMP %ECX,%R11D |
(120) 0x41d458 JAE 41d916 |
(120) 0x41d45e MOV 0x38(%RSP),%R13 |
(120) 0x41d463 MOV 0x30(%RSP),%RDI |
(120) 0x41d468 MOV 0x20(%RSP),%R10 |
(120) 0x41d46d MOV 0x28(%RSP),%RAX |
(120) 0x41d472 MOV (%R13),%R8 |
(120) 0x41d476 MOV (%RDI),%R9 |
(120) 0x41d479 MOV 0x10(%RDI),%R14 |
(120) 0x41d47d MOV (%R10),%RDI |
(120) 0x41d480 IMUL %R12,%R8 |
(120) 0x41d484 MOV 0x10(%R13),%R15 |
(120) 0x41d488 MOV 0x10(%R10),%RSI |
(120) 0x41d48c IMUL %R12,%R9 |
(120) 0x41d490 MOV (%RAX),%R13 |
(120) 0x41d493 MOV 0x10(%RAX),%RBX |
(120) 0x41d497 LEA -0x1(%RDX),%EAX |
(120) 0x41d49a IMUL %R12,%RDI |
(120) 0x41d49e MOV %RSI,0x78(%RSP) |
(120) 0x41d4a3 MOV %R8,0x58(%RSP) |
(120) 0x41d4a8 IMUL %R12,%R13 |
(120) 0x41d4ac MOV %R9,0x60(%RSP) |
(120) 0x41d4b1 MOV %RDI,0x68(%RSP) |
(120) 0x41d4b6 CMP $0x6,%EAX |
(120) 0x41d4b9 JBE 41d958 |
(120) 0x41d4bf MOVSXD 0x74(%RSP),%RAX |
(120) 0x41d4c4 MOV 0x68(%RSP),%RSI |
(120) 0x41d4c9 LEA (%R9,%RAX,1),%RCX |
(120) 0x41d4cd LEA (%R8,%RAX,1),%R8 |
(120) 0x41d4d1 LEA (%R14,%RCX,8),%R9 |
(120) 0x41d4d5 MOV 0x78(%RSP),%RCX |
(120) 0x41d4da LEA 0x1(%R13,%RAX,1),%RDI |
(120) 0x41d4df ADD %RSI,%RAX |
(120) 0x41d4e2 SAL $0x3,%RDI |
(120) 0x41d4e6 LEA (%R15,%R8,8),%R10 |
(120) 0x41d4ea LEA (%RCX,%RAX,8),%RSI |
(120) 0x41d4ee MOV %EDX,%ECX |
(120) 0x41d4f0 LEA (%RBX,%RDI,1),%R8 |
(120) 0x41d4f4 XOR %EAX,%EAX |
(120) 0x41d4f6 SHR $0x3,%ECX |
(120) 0x41d4f9 LEA -0x8(%RBX,%RDI,1),%RDI |
(120) 0x41d4fe SAL $0x6,%RCX |
(120) 0x41d502 MOV %RCX,0x50(%RSP) |
(120) 0x41d507 SUB $0x40,%RCX |
(120) 0x41d50b SHR $0x6,%RCX |
(120) 0x41d50f INC %RCX |
(120) 0x41d512 AND $0x7,%ECX |
(120) 0x41d515 JE 41d663 |
(120) 0x41d51b CMP $0x1,%RCX |
(120) 0x41d51f JE 41d631 |
(120) 0x41d525 CMP $0x2,%RCX |
(120) 0x41d529 JE 41d60a |
(120) 0x41d52f CMP $0x3,%RCX |
(120) 0x41d533 JE 41d5e3 |
(120) 0x41d539 CMP $0x4,%RCX |
(120) 0x41d53d JE 41d5bc |
(120) 0x41d53f CMP $0x5,%RCX |
(120) 0x41d543 JE 41d595 |
(120) 0x41d545 CMP $0x6,%RCX |
(120) 0x41d549 JE 41d56e |
(120) 0x41d54b VMOVUPD (%R10),%ZMM0 |
(120) 0x41d551 MOV $0x40,%EAX |
(120) 0x41d556 VMOVUPD %ZMM0,(%R9) |
(120) 0x41d55c VADDPD (%R8),%ZMM0,%ZMM1 |
(120) 0x41d562 VSUBPD (%RDI),%ZMM1,%ZMM2 |
(120) 0x41d568 VMOVUPD %ZMM2,(%RSI) |
(120) 0x41d56e VMOVUPD (%R10,%RAX,1),%ZMM3 |
(120) 0x41d575 VMOVUPD %ZMM3,(%R9,%RAX,1) |
(120) 0x41d57c VADDPD (%R8,%RAX,1),%ZMM3,%ZMM4 |
(120) 0x41d583 VSUBPD (%RDI,%RAX,1),%ZMM4,%ZMM5 |
(120) 0x41d58a VMOVUPD %ZMM5,(%RSI,%RAX,1) |
(120) 0x41d591 ADD $0x40,%RAX |
(120) 0x41d595 VMOVUPD (%R10,%RAX,1),%ZMM6 |
(120) 0x41d59c VMOVUPD %ZMM6,(%R9,%RAX,1) |
(120) 0x41d5a3 VADDPD (%R8,%RAX,1),%ZMM6,%ZMM7 |
(120) 0x41d5aa VSUBPD (%RDI,%RAX,1),%ZMM7,%ZMM8 |
(120) 0x41d5b1 VMOVUPD %ZMM8,(%RSI,%RAX,1) |
(120) 0x41d5b8 ADD $0x40,%RAX |
(120) 0x41d5bc VMOVUPD (%R10,%RAX,1),%ZMM9 |
(120) 0x41d5c3 VMOVUPD %ZMM9,(%R9,%RAX,1) |
(120) 0x41d5ca VADDPD (%R8,%RAX,1),%ZMM9,%ZMM10 |
(120) 0x41d5d1 VSUBPD (%RDI,%RAX,1),%ZMM10,%ZMM11 |
(120) 0x41d5d8 VMOVUPD %ZMM11,(%RSI,%RAX,1) |
(120) 0x41d5df ADD $0x40,%RAX |
(120) 0x41d5e3 VMOVUPD (%R10,%RAX,1),%ZMM12 |
(120) 0x41d5ea VMOVUPD %ZMM12,(%R9,%RAX,1) |
(120) 0x41d5f1 VADDPD (%R8,%RAX,1),%ZMM12,%ZMM13 |
(120) 0x41d5f8 VSUBPD (%RDI,%RAX,1),%ZMM13,%ZMM14 |
(120) 0x41d5ff VMOVUPD %ZMM14,(%RSI,%RAX,1) |
(120) 0x41d606 ADD $0x40,%RAX |
(120) 0x41d60a VMOVUPD (%R10,%RAX,1),%ZMM15 |
(120) 0x41d611 VMOVUPD %ZMM15,(%R9,%RAX,1) |
(120) 0x41d618 VADDPD (%R8,%RAX,1),%ZMM15,%ZMM0 |
(120) 0x41d61f VSUBPD (%RDI,%RAX,1),%ZMM0,%ZMM1 |
(120) 0x41d626 VMOVUPD %ZMM1,(%RSI,%RAX,1) |
(120) 0x41d62d ADD $0x40,%RAX |
(120) 0x41d631 VMOVUPD (%R10,%RAX,1),%ZMM2 |
(120) 0x41d638 VMOVUPD %ZMM2,(%R9,%RAX,1) |
(120) 0x41d63f VADDPD (%R8,%RAX,1),%ZMM2,%ZMM3 |
(120) 0x41d646 VSUBPD (%RDI,%RAX,1),%ZMM3,%ZMM4 |
(120) 0x41d64d VMOVUPD %ZMM4,(%RSI,%RAX,1) |
(120) 0x41d654 ADD $0x40,%RAX |
(120) 0x41d658 CMP %RAX,0x50(%RSP) |
(120) 0x41d65d JE 41d7af |
(121) 0x41d663 VMOVUPD (%R10,%RAX,1),%ZMM5 |
(121) 0x41d66a VMOVUPD %ZMM5,(%R9,%RAX,1) |
(121) 0x41d671 VADDPD (%R8,%RAX,1),%ZMM5,%ZMM6 |
(121) 0x41d678 VSUBPD (%RDI,%RAX,1),%ZMM6,%ZMM7 |
(121) 0x41d67f VMOVUPD %ZMM7,(%RSI,%RAX,1) |
(121) 0x41d686 VMOVUPD 0x40(%R10,%RAX,1),%ZMM8 |
(121) 0x41d68e VMOVUPD %ZMM8,0x40(%R9,%RAX,1) |
(121) 0x41d696 VADDPD 0x40(%R8,%RAX,1),%ZMM8,%ZMM9 |
(121) 0x41d69e VSUBPD 0x40(%RDI,%RAX,1),%ZMM9,%ZMM10 |
(121) 0x41d6a6 VMOVUPD %ZMM10,0x40(%RSI,%RAX,1) |
(121) 0x41d6ae VMOVUPD 0x80(%R10,%RAX,1),%ZMM11 |
(121) 0x41d6b6 VMOVUPD %ZMM11,0x80(%R9,%RAX,1) |
(121) 0x41d6be VADDPD 0x80(%R8,%RAX,1),%ZMM11,%ZMM12 |
(121) 0x41d6c6 VSUBPD 0x80(%RDI,%RAX,1),%ZMM12,%ZMM13 |
(121) 0x41d6ce VMOVUPD %ZMM13,0x80(%RSI,%RAX,1) |
(121) 0x41d6d6 VMOVUPD 0xc0(%R10,%RAX,1),%ZMM14 |
(121) 0x41d6de VMOVUPD %ZMM14,0xc0(%R9,%RAX,1) |
(121) 0x41d6e6 VADDPD 0xc0(%R8,%RAX,1),%ZMM14,%ZMM15 |
(121) 0x41d6ee VSUBPD 0xc0(%RDI,%RAX,1),%ZMM15,%ZMM0 |
(121) 0x41d6f6 VMOVUPD %ZMM0,0xc0(%RSI,%RAX,1) |
(121) 0x41d6fe VMOVUPD 0x100(%R10,%RAX,1),%ZMM1 |
(121) 0x41d706 VMOVUPD %ZMM1,0x100(%R9,%RAX,1) |
(121) 0x41d70e VADDPD 0x100(%R8,%RAX,1),%ZMM1,%ZMM2 |
(121) 0x41d716 VSUBPD 0x100(%RDI,%RAX,1),%ZMM2,%ZMM3 |
(121) 0x41d71e VMOVUPD %ZMM3,0x100(%RSI,%RAX,1) |
(121) 0x41d726 VMOVUPD 0x140(%R10,%RAX,1),%ZMM4 |
(121) 0x41d72e VMOVUPD %ZMM4,0x140(%R9,%RAX,1) |
(121) 0x41d736 VADDPD 0x140(%R8,%RAX,1),%ZMM4,%ZMM5 |
(121) 0x41d73e VSUBPD 0x140(%RDI,%RAX,1),%ZMM5,%ZMM6 |
(121) 0x41d746 VMOVUPD %ZMM6,0x140(%RSI,%RAX,1) |
(121) 0x41d74e VMOVUPD 0x180(%R10,%RAX,1),%ZMM7 |
(121) 0x41d756 VMOVUPD %ZMM7,0x180(%R9,%RAX,1) |
(121) 0x41d75e VADDPD 0x180(%R8,%RAX,1),%ZMM7,%ZMM8 |
(121) 0x41d766 VSUBPD 0x180(%RDI,%RAX,1),%ZMM8,%ZMM9 |
(121) 0x41d76e VMOVUPD %ZMM9,0x180(%RSI,%RAX,1) |
(121) 0x41d776 VMOVUPD 0x1c0(%R10,%RAX,1),%ZMM10 |
(121) 0x41d77e VMOVUPD %ZMM10,0x1c0(%R9,%RAX,1) |
(121) 0x41d786 VADDPD 0x1c0(%R8,%RAX,1),%ZMM10,%ZMM11 |
(121) 0x41d78e VSUBPD 0x1c0(%RDI,%RAX,1),%ZMM11,%ZMM12 |
(121) 0x41d796 VMOVUPD %ZMM12,0x1c0(%RSI,%RAX,1) |
(121) 0x41d79e ADD $0x200,%RAX |
(121) 0x41d7a4 CMP %RAX,0x50(%RSP) |
(121) 0x41d7a9 JNE 41d663 |
(120) 0x41d7af MOV 0x74(%RSP),%R10D |
(120) 0x41d7b4 MOV %EDX,%R9D |
(120) 0x41d7b7 AND $-0x8,%R9D |
(120) 0x41d7bb ADD %R9D,%R11D |
(120) 0x41d7be LEA (%R9,%R10,1),%ESI |
(120) 0x41d7c2 TEST $0x7,%DL |
(120) 0x41d7c5 JE 41d911 |
(120) 0x41d7cb SUB %R9D,%EDX |
(120) 0x41d7ce LEA -0x1(%RDX),%R8D |
(120) 0x41d7d2 CMP $0x2,%R8D |
(120) 0x41d7d6 JBE 41d840 |
(120) 0x41d7d8 MOVSXD 0x74(%RSP),%RAX |
(120) 0x41d7dd MOV 0x58(%RSP),%R10 |
(120) 0x41d7e2 MOV 0x60(%RSP),%R8 |
(120) 0x41d7e7 ADD %RAX,%R10 |
(120) 0x41d7ea LEA (%R13,%RAX,1),%RDI |
(120) 0x41d7ef ADD %R9,%R10 |
(120) 0x41d7f2 ADD %RAX,%R8 |
(120) 0x41d7f5 LEA 0x1(%R9,%RDI,1),%RCX |
(120) 0x41d7fa MOV 0x68(%RSP),%RDI |
(120) 0x41d7ff VMOVUPD (%R15,%R10,8),%YMM13 |
(120) 0x41d805 ADD %R9,%R8 |
(120) 0x41d808 ADD %RDI,%RAX |
(120) 0x41d80b VMOVUPD %YMM13,(%R14,%R8,8) |
(120) 0x41d811 ADD %R9,%RAX |
(120) 0x41d814 MOV 0x78(%RSP),%R9 |
(120) 0x41d819 VMOVUPD (%RBX,%RCX,8),%YMM14 |
(120) 0x41d81e VSUBPD -0x8(%RBX,%RCX,8),%YMM14,%YMM15 |
(120) 0x41d824 VADDPD %YMM13,%YMM15,%YMM0 |
(120) 0x41d829 VMOVUPD %YMM0,(%R9,%RAX,8) |
(120) 0x41d82f TEST $0x3,%DL |
(120) 0x41d832 JE 41d911 |
(120) 0x41d838 AND $-0x4,%EDX |
(120) 0x41d83b ADD %EDX,%R11D |
(120) 0x41d83e ADD %EDX,%ESI |
(120) 0x41d840 MOV 0x58(%RSP),%R9 |
(120) 0x41d845 MOVSXD %ESI,%RDX |
(120) 0x41d848 MOV 0x60(%RSP),%R10 |
(120) 0x41d84d LEA (%R9,%RDX,1),%RAX |
(120) 0x41d851 LEA (%R10,%RDX,1),%RCX |
(120) 0x41d855 VMOVSD (%R15,%RAX,8),%XMM1 |
(120) 0x41d85b LEA 0x1(%RSI),%EAX |
(120) 0x41d85e CLTQ |
(120) 0x41d860 LEA (%R13,%RAX,1),%R8 |
(120) 0x41d865 VMOVSD %XMM1,(%R14,%RCX,8) |
(120) 0x41d86b LEA (%RBX,%R8,8),%RCX |
(120) 0x41d86f MOV 0x68(%RSP),%R8 |
(120) 0x41d874 VMOVSD (%RCX),%XMM2 |
(120) 0x41d878 LEA (%R8,%RDX,1),%RDI |
(120) 0x41d87c ADD %R13,%RDX |
(120) 0x41d87f VSUBSD (%RBX,%RDX,8),%XMM2,%XMM3 |
(120) 0x41d884 MOV 0x78(%RSP),%RDX |
(120) 0x41d889 VADDSD %XMM1,%XMM3,%XMM4 |
(120) 0x41d88d VMOVSD %XMM4,(%RDX,%RDI,8) |
(120) 0x41d892 MOV 0x70(%RSP),%EDI |
(120) 0x41d896 LEA 0x1(%R11),%EDX |
(120) 0x41d89a CMP %EDI,%EDX |
(120) 0x41d89c JAE 41d911 |
(120) 0x41d89e LEA (%RAX,%R9,1),%RDX |
(120) 0x41d8a2 LEA (%RAX,%R10,1),%RDI |
(120) 0x41d8a6 ADD %R8,%RAX |
(120) 0x41d8a9 ADD $0x2,%R11D |
(120) 0x41d8ad VMOVSD (%R15,%RDX,8),%XMM5 |
(120) 0x41d8b3 LEA 0x2(%RSI),%EDX |
(120) 0x41d8b6 MOVSXD %EDX,%RDX |
(120) 0x41d8b9 VMOVSD %XMM5,(%R14,%RDI,8) |
(120) 0x41d8bf LEA (%R13,%RDX,1),%RDI |
(120) 0x41d8c4 LEA (%RBX,%RDI,8),%RDI |
(120) 0x41d8c8 VADDSD (%RDI),%XMM5,%XMM6 |
(120) 0x41d8cc VSUBSD (%RCX),%XMM6,%XMM7 |
(120) 0x41d8d0 MOV 0x78(%RSP),%RCX |
(120) 0x41d8d5 VMOVSD %XMM7,(%RCX,%RAX,8) |
(120) 0x41d8da MOV 0x70(%RSP),%EAX |
(120) 0x41d8de CMP %EAX,%R11D |
(120) 0x41d8e1 JAE 41d911 |
(120) 0x41d8e3 ADD %RDX,%R9 |
(120) 0x41d8e6 ADD $0x3,%ESI |
(120) 0x41d8e9 ADD %RDX,%R10 |
(120) 0x41d8ec ADD %RDX,%R8 |
(120) 0x41d8ef VMOVSD (%R15,%R9,8),%XMM8 |
(120) 0x41d8f5 MOVSXD %ESI,%R11 |
(120) 0x41d8f8 ADD %R13,%R11 |
(120) 0x41d8fb VMOVSD %XMM8,(%R14,%R10,8) |
(120) 0x41d901 VADDSD (%RBX,%R11,8),%XMM8,%XMM9 |
(120) 0x41d907 VSUBSD (%RDI),%XMM9,%XMM10 |
(120) 0x41d90b VMOVSD %XMM10,(%RCX,%R8,8) |
(120) 0x41d911 MOV 0x70(%RSP),%R11D |
(120) 0x41d916 INC %R12 |
(120) 0x41d919 LEA (%R12),%R15D |
(120) 0x41d91d CMP %R15D,0x48(%RSP) |
(120) 0x41d922 JLE 41d940 |
(120) 0x41d924 MOV 0x40(%RSP),%EDI |
(120) 0x41d928 MOV 0x44(%RSP),%R14D |
(120) 0x41d92d MOV 0x4c(%RSP),%EDX |
(120) 0x41d931 MOV %R14D,0x74(%RSP) |
(120) 0x41d936 SUB %R11D,%EDI |
(120) 0x41d939 JMP 41d448 |
0x41d93e XCHG %AX,%AX |
0x41d940 VZEROUPPER |
0x41d943 LEA -0x28(%RBP),%RSP |
0x41d947 POP %RBX |
0x41d948 POP %R12 |
0x41d94a POP %R13 |
0x41d94c POP %R14 |
0x41d94e POP %R15 |
0x41d950 POP %RBP |
0x41d951 RET |
0x41d952 NOPW (%RAX,%RAX,1) |
(120) 0x41d958 MOV 0x74(%RSP),%ESI |
(120) 0x41d95c XOR %R9D,%R9D |
(120) 0x41d95f JMP 41d7cb |
0x41d964 INC %EDI |
0x41d966 XOR %EDX,%EDX |
0x41d968 JMP 41d3e1 |
0x41d96d NOPL (%RAX) |
Path / |
Source file and lines | advec_mom.cpp:71-75 |
Module | exec |
nb instructions | 80 |
nb uops | 90 |
loop length | 270 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.24-14.38 |
Stall cycles | 0.00 |
Front-end | 15.00 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 9% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA -0x1(%RBX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 41d943 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x4(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 41d943 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x4c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 41d964 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x604> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EDI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RDI,%R11,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 41d943 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x44(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x4c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R10D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 41d3e1 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x81> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | advec_mom.cpp:71-75 |
Module | exec |
nb instructions | 80 |
nb uops | 90 |
loop length | 270 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.70 | 8.00 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
cycles | 5.70 | 11.73 | 6.00 | 6.00 | 8.50 | 5.87 | 5.70 | 8.50 | 8.50 | 8.50 | 5.73 | 6.00 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 14.24-14.38 |
Stall cycles | 0.00 |
Front-end | 15.00 |
Dispatch | 11.73 |
DIV/SQRT | 12.00 |
Overall L1 | 15.00 |
all | 4% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 9% |
load | 10% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 6% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 9% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x40,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD $-0x80,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%EBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x24(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x4,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA -0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA -0x1(%RBX),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %ESI,0x44(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 41d943 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %EDX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x4(%RCX),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 41d943 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14D,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %ESI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EDI,0x4c(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x4c(%RSP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %EBX,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 41d964 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x604> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EDI,%R8D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R8,%RDX,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA (%RDI,%R11,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 41d943 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x5e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x44(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R13),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL 0x4c(%RSP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x10(%R13),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R15D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x8(%R13),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,0x74(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R10D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %EAX,%R12 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 41d3e1 <_Z16advec_mom_kerneliiiiRN6clover8Buffer2DIdEES2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_S2_RNS_8Buffer1DIdEES5_iii._omp_fn.3+0x81> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advec_mom_kernel(int, int, int, int, clover::Buffer2D | 2.08 | 0.66 |
▼Loop 120 - advec_mom.cpp:74-75 - exec– | 0.01 | 0 |
○Loop 121 - advec_mom.cpp:74-75 - exec | 2.07 | 0.66 |