Function: _Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.l ... | Module: exec | Source: viscosity.cpp:36-64 [...] | Coverage: 2.71% |
---|
Function: _Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.l ... | Module: exec | Source: viscosity.cpp:36-64 [...] | Coverage: 2.71% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/viscosity.cpp: 36 - 64 |
-------------------------------------------------------------------------------- |
36: #pragma omp parallel for simd collapse(2) |
37: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
38: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
39: double ugrad = (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1)) - (xvel0(i, j) + xvel0(i + 0, j + 1)); |
40: double vgrad = (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1)) - (yvel0(i, j) + yvel0(i + 1, j + 0)); |
41: double div = (celldx[i] * (ugrad) + celldy[j] * (vgrad)); |
42: double strain2 = 0.5 * (xvel0(i + 0, j + 1) + xvel0(i + 1, j + 1) - xvel0(i, j) - xvel0(i + 1, j + 0)) / celldy[j] + |
43: 0.5 * (yvel0(i + 1, j + 0) + yvel0(i + 1, j + 1) - yvel0(i, j) - yvel0(i + 0, j + 1)) / celldx[i]; |
44: double pgradx = (pressure(i + 1, j + 0) - pressure(i - 1, j + 0)) / (celldx[i] + celldx[i + 1]); |
45: double pgrady = (pressure(i + 0, j + 1) - pressure(i + 0, j - 1)) / (celldy[j] + celldy[j + 2]); |
46: double pgradx2 = pgradx * pgradx; |
47: double pgrady2 = pgrady * pgrady; |
48: double limiter = ((0.5 * (ugrad) / celldx[i]) * pgradx2 + (0.5 * (vgrad) / celldy[j]) * pgrady2 + strain2 * pgradx * pgrady) / |
49: std::fmax(pgradx2 + pgrady2, g_small); |
50: if ((limiter > 0.0) || (div >= 0.0)) { |
51: viscosity(i, j) = 0.0; |
52: } else { |
53: double dirx = 1.0; |
54: if (pgradx < 0.0) dirx = -1.0; |
55: pgradx = dirx * std::fmax(g_small, std::fabs(pgradx)); |
56: double diry = 1.0; |
57: if (pgradx < 0.0) diry = -1.0; |
58: pgrady = diry * std::fmax(g_small, std::fabs(pgrady)); |
59: double pgrad = std::sqrt(pgradx * pgradx + pgrady * pgrady); |
60: double xgrad = std::fabs(celldx[i] * pgrad / pgradx); |
61: double ygrad = std::fabs(celldy[j] * pgrad / pgrady); |
62: double grad = std::fmin(xgrad, ygrad); |
63: double grad2 = grad * grad; |
64: viscosity(i, j) = 2.0 * density0(i, j) * grad2 * limiter * limiter; |
/home/eoseret/qaas_runs_CPU_9468/171-145-9336/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x45b430 PUSH %RBP |
0x45b431 MOV %RSP,%RBP |
0x45b434 PUSH %R15 |
0x45b436 PUSH %R14 |
0x45b438 PUSH %R13 |
0x45b43a PUSH %R12 |
0x45b43c PUSH %RBX |
0x45b43d MOV %RDI,%RBX |
0x45b440 SUB $0x88,%RSP |
0x45b447 MOV 0x40(%RDI),%EAX |
0x45b44a MOV 0x44(%RDI),%ECX |
0x45b44d MOV 0x38(%RDI),%EDX |
0x45b450 MOV 0x3c(%RBX),%ESI |
0x45b453 ADD $0x2,%ECX |
0x45b456 LEA 0x1(%RAX),%R15D |
0x45b45a LEA 0x1(%RDX),%EDI |
0x45b45d MOV %ECX,-0x80(%RBP) |
0x45b460 MOV %EDI,-0x84(%RBP) |
0x45b466 CMP %ECX,%R15D |
0x45b469 JGE 45b8b0 |
0x45b46f MOV %ECX,%R13D |
0x45b472 LEA 0x2(%RSI),%R14D |
0x45b476 SUB %R15D,%R13D |
0x45b479 CMP %R14D,%EDI |
0x45b47c JGE 45b8b0 |
0x45b482 MOV %R14D,%R8D |
0x45b485 SUB %EDI,%R8D |
0x45b488 MOV %R8D,-0x7c(%RBP) |
0x45b48c CALL 4046c0 <omp_get_num_threads@plt> |
0x45b491 MOV %EAX,%R12D |
0x45b494 CALL 4045b0 <omp_get_thread_num@plt> |
0x45b499 XOR %EDX,%EDX |
0x45b49b MOV %EAX,%ECX |
0x45b49d MOV -0x7c(%RBP),%EAX |
0x45b4a0 IMUL %R13D,%EAX |
0x45b4a4 DIV %R12D |
0x45b4a7 MOV %EAX,%R12D |
0x45b4aa CMP %EDX,%ECX |
0x45b4ac JB 45b8ed |
0x45b4b2 IMUL %R12D,%ECX |
0x45b4b6 ADD %EDX,%ECX |
0x45b4b8 LEA (%R12,%RCX,1),%R9D |
0x45b4bc MOV %R9D,-0x88(%RBP) |
0x45b4c3 CMP %R9D,%ECX |
0x45b4c6 JAE 45b8b0 |
0x45b4cc MOV %ECX,%EAX |
0x45b4ce XOR %EDX,%EDX |
0x45b4d0 MOV -0x84(%RBP),%R10D |
0x45b4d7 MOV 0x30(%RBX),%RDI |
0x45b4db DIVL -0x7c(%RBP) |
0x45b4de MOV 0x8(%RBX),%R13 |
0x45b4e2 MOV 0x18(%RBX),%R8 |
0x45b4e6 VMOVQ 0x7592(%RIP),%XMM8 |
0x45b4ee VMOVSD 0x80e2(%RIP),%XMM10 |
0x45b4f6 MOV %RDI,-0x98(%RBP) |
0x45b4fd VMOVQ 0x819b(%RIP),%XMM12 |
0x45b505 VMOVSD 0x70ab(%RIP),%XMM11 |
0x45b50d MOV %R13,-0xa8(%RBP) |
0x45b514 MOV %R8,-0xb0(%RBP) |
0x45b51b VXORPD %XMM5,%XMM5,%XMM5 |
0x45b51f LEA (%RDX,%R10,1),%R9D |
0x45b523 MOV %R14D,%EDX |
0x45b526 MOV (%RBX),%R14 |
0x45b529 LEA (%RAX,%R15,1),%R11D |
0x45b52d MOV 0x28(%RBX),%R15 |
0x45b531 MOV %R11D,-0x48(%RBP) |
0x45b535 SUB %R9D,%EDX |
0x45b538 MOV %R11D,%EAX |
0x45b53b MOV %R14,-0xa0(%RBP) |
0x45b542 MOV 0x10(%RBX),%R14 |
0x45b546 MOV 0x20(%RBX),%RBX |
0x45b54a MOV %R15,-0x90(%RBP) |
0x45b551 MOVSXD %R11D,%R15 |
0x45b554 MOV %RBX,-0x68(%RBP) |
0x45b558 NOPL (%RAX,%RAX,1) |
(505) 0x45b560 CMP %EDX,%R12D |
(505) 0x45b563 CMOVBE %R12D,%EDX |
(505) 0x45b567 INC %EAX |
(505) 0x45b569 MOV %EAX,-0x5c(%RBP) |
(505) 0x45b56c LEA (%RCX,%RDX,1),%ESI |
(505) 0x45b56f MOV %ESI,-0x60(%RBP) |
(505) 0x45b572 CMP %ESI,%ECX |
(505) 0x45b574 JAE 45b8e0 |
(505) 0x45b57a MOV -0x98(%RBP),%RAX |
(505) 0x45b581 MOV -0x90(%RBP),%R12 |
(505) 0x45b588 LEA 0x1(%R15),%RCX |
(505) 0x45b58c VMOVDDUP 0x8034(%RIP),%XMM9 |
(505) 0x45b594 MOV -0xa8(%RBP),%R8 |
(505) 0x45b59b MOV -0xa0(%RBP),%R13 |
(505) 0x45b5a2 MOV %RCX,-0x58(%RBP) |
(505) 0x45b5a6 MOV 0x10(%RAX),%R11 |
(505) 0x45b5aa MOV 0x10(%R12),%R10 |
(505) 0x45b5af MOV (%R12),%RDI |
(505) 0x45b5b3 MOV (%RAX),%R12 |
(505) 0x45b5b6 MOV 0x8(%R8),%RAX |
(505) 0x45b5ba MOV -0x48(%RBP),%R8D |
(505) 0x45b5be MOV %R11,-0x70(%RBP) |
(505) 0x45b5c2 MOV -0xb0(%RBP),%R11 |
(505) 0x45b5c9 MOV 0x8(%R13),%RBX |
(505) 0x45b5cd MOV %RDI,-0x78(%RBP) |
(505) 0x45b5d1 IMUL %R15,%RDI |
(505) 0x45b5d5 IMUL %R12,%RCX |
(505) 0x45b5d9 LEA (%RAX,%R15,8),%RSI |
(505) 0x45b5dd VMOVSD 0x7fe3(%RIP),%XMM6 |
(505) 0x45b5e5 MOV 0x10(%R11),%R13 |
(505) 0x45b5e9 MOV %RSI,-0x40(%RBP) |
(505) 0x45b5ed MOV (%R11),%RSI |
(505) 0x45b5f0 LEA 0x2(%R8),%R11D |
(505) 0x45b5f4 MOVSXD %R11D,%R8 |
(505) 0x45b5f7 MOV %R13,-0x50(%RBP) |
(505) 0x45b5fb LEA (%RAX,%R8,8),%R11 |
(505) 0x45b5ff MOVSXD %R9D,%RAX |
(505) 0x45b602 LEA -0x1(%RDX),%R9D |
(505) 0x45b606 MOV %R10,%R8 |
(505) 0x45b609 LEA 0x1(%RAX),%RDX |
(505) 0x45b60d LEA (%R10,%RDI,8),%R10 |
(505) 0x45b611 ADD %RDX,%R9 |
(505) 0x45b614 LEA (,%RSI,8),%R13 |
(505) 0x45b61c MOV %R9,-0x38(%RBP) |
(505) 0x45b620 MOV -0x78(%RBP),%R9 |
(505) 0x45b624 ADD %R9,%RDI |
(505) 0x45b627 LEA (%R8,%RDI,8),%R9 |
(505) 0x45b62b MOV -0x70(%RBP),%RDI |
(505) 0x45b62f LEA (%RDI,%RCX,8),%R8 |
(505) 0x45b633 SUB %R12,%RCX |
(505) 0x45b636 LEA (%RDI,%RCX,8),%RDI |
(505) 0x45b63a MOV %RSI,%RCX |
(505) 0x45b63d MOV -0x50(%RBP),%RSI |
(505) 0x45b641 IMUL %R15,%RCX |
(505) 0x45b645 LEA (%RSI,%RCX,8),%R12 |
(505) 0x45b649 MOV -0x48(%RBP),%ECX |
(505) 0x45b64c MOV %R12,-0x70(%RBP) |
(505) 0x45b650 MOV -0x58(%RBP),%R12 |
(505) 0x45b654 DEC %ECX |
(505) 0x45b656 MOVSXD %ECX,%RCX |
(505) 0x45b659 IMUL %R13,%R12 |
(505) 0x45b65d IMUL %R13,%RCX |
(505) 0x45b661 MOV -0x68(%RBP),%R13 |
(505) 0x45b665 MOV (%R13),%R13 |
(505) 0x45b669 ADD %RSI,%R12 |
(505) 0x45b66c ADD %RSI,%RCX |
(505) 0x45b66f MOV -0x68(%RBP),%RSI |
(505) 0x45b673 IMUL %R15,%R13 |
(505) 0x45b677 MOV 0x10(%RSI),%RSI |
(505) 0x45b67b VMOVSD 0x6f3d(%RIP),%XMM7 |
(505) 0x45b683 MOV %R15,-0x50(%RBP) |
(505) 0x45b687 MOV %RCX,-0x48(%RBP) |
(505) 0x45b68b LEA (%RSI,%R13,8),%R13 |
(505) 0x45b68f MOV -0x70(%RBP),%RSI |
(505) 0x45b693 MOV %R13,%R15 |
(505) 0x45b696 JMP 45b72f |
0x45b69b NOPL (%RAX,%RAX,1) |
(506) 0x45b6a0 VCOMISD %XMM24,%XMM5 |
(506) 0x45b6a6 VANDPD %XMM8,%XMM24,%XMM15 |
(506) 0x45b6ac VMOVSD %XMM10,%XMM10,%XMM2 |
(506) 0x45b6b0 VMAXSD %XMM7,%XMM15,%XMM14 |
(506) 0x45b6b4 JA 45b8c8 |
(506) 0x45b6ba VANDPD %XMM8,%XMM0,%XMM0 |
(506) 0x45b6bf MOV -0x50(%RBP),%R13 |
(506) 0x45b6c3 MOV 0x10(%R14),%RCX |
(506) 0x45b6c7 VMAXSD %XMM7,%XMM0,%XMM15 |
(506) 0x45b6cb IMUL (%R14),%R13 |
(506) 0x45b6cf VMULSD %XMM2,%XMM15,%XMM4 |
(506) 0x45b6d3 ADD %RAX,%R13 |
(506) 0x45b6d6 VMULSD %XMM4,%XMM4,%XMM0 |
(506) 0x45b6da VFMADD231SD %XMM14,%XMM14,%XMM0 |
(506) 0x45b6df VSQRTSD %XMM0,%XMM0,%XMM0 |
(506) 0x45b6e3 VMULSD %XMM3,%XMM0,%XMM3 |
(506) 0x45b6e7 VMULSD %XMM13,%XMM0,%XMM13 |
(506) 0x45b6ec VDIVSD %XMM4,%XMM3,%XMM2 |
(506) 0x45b6f0 VMOVSD (%RCX,%R13,8),%XMM3 |
(506) 0x45b6f6 VDIVSD %XMM14,%XMM13,%XMM14 |
(506) 0x45b6fb VANDPD %XMM8,%XMM2,%XMM15 |
(506) 0x45b700 VANDPD %XMM8,%XMM14,%XMM4 |
(506) 0x45b705 VMINSD %XMM4,%XMM15,%XMM0 |
(506) 0x45b709 VADDSD %XMM3,%XMM3,%XMM15 |
(506) 0x45b70d VMULSD %XMM1,%XMM0,%XMM1 |
(506) 0x45b711 VMULSD %XMM1,%XMM1,%XMM2 |
(506) 0x45b715 VMULSD %XMM2,%XMM15,%XMM13 |
(506) 0x45b719 VMOVSD %XMM13,(%R15,%RAX,8) |
(506) 0x45b71f MOV %RDX,%RAX |
(506) 0x45b722 CMP %RDX,-0x38(%RBP) |
(506) 0x45b726 JE 45b87c |
(506) 0x45b72c INC %RDX |
(506) 0x45b72f VMOVSD 0x8(%R9,%RAX,8),%XMM2 |
(506) 0x45b736 VMOVSD (%R9,%RAX,8),%XMM1 |
(506) 0x45b73c VMOVSD 0x8(%R10,%RAX,8),%XMM18 |
(506) 0x45b744 VMOVSD (%R10,%RAX,8),%XMM14 |
(506) 0x45b74a VADDSD %XMM2,%XMM1,%XMM0 |
(506) 0x45b74e VMOVSD 0x8(%R8,%RAX,8),%XMM13 |
(506) 0x45b755 VMOVSD 0x8(%RDI,%RAX,8),%XMM15 |
(506) 0x45b75b VADDSD %XMM14,%XMM1,%XMM4 |
(506) 0x45b760 VADDSD %XMM18,%XMM2,%XMM3 |
(506) 0x45b766 VMOVSD (%R8,%RAX,8),%XMM17 |
(506) 0x45b76d MOV -0x40(%RBP),%RCX |
(506) 0x45b771 VADDSD %XMM13,%XMM15,%XMM16 |
(506) 0x45b777 VADDSD %XMM18,%XMM14,%XMM14 |
(506) 0x45b77d VMOVSD 0x8(%RSI,%RAX,8),%XMM22 |
(506) 0x45b785 MOV -0x48(%RBP),%R13 |
(506) 0x45b789 VADDSD %XMM17,%XMM13,%XMM1 |
(506) 0x45b78f VMOVSD (%RBX,%RAX,8),%XMM13 |
(506) 0x45b794 VSUBSD %XMM4,%XMM3,%XMM2 |
(506) 0x45b798 VMOVSD (%RDI,%RAX,8),%XMM4 |
(506) 0x45b79d VSUBSD -0x8(%RSI,%RAX,8),%XMM22,%XMM23 |
(506) 0x45b7a5 VSUBSD %XMM14,%XMM0,%XMM0 |
(506) 0x45b7aa VADDSD %XMM4,%XMM15,%XMM3 |
(506) 0x45b7ae VADDSD %XMM17,%XMM4,%XMM4 |
(506) 0x45b7b4 VSUBSD %XMM4,%XMM16,%XMM20 |
(506) 0x45b7ba VSUBSD %XMM3,%XMM1,%XMM1 |
(506) 0x45b7be VMOVSD (%RCX),%XMM3 |
(506) 0x45b7c2 VUNPCKLPD %XMM13,%XMM3,%XMM19 |
(506) 0x45b7c8 VMULSD %XMM1,%XMM3,%XMM15 |
(506) 0x45b7cc VUNPCKLPD %XMM20,%XMM0,%XMM14 |
(506) 0x45b7d2 VMULPD %XMM9,%XMM14,%XMM0 |
(506) 0x45b7d7 VMULSD %XMM6,%XMM1,%XMM1 |
(506) 0x45b7db VFMADD231SD %XMM2,%XMM13,%XMM15 |
(506) 0x45b7e0 VDIVPD %XMM19,%XMM0,%XMM14 |
(506) 0x45b7e6 VADDSD 0x8(%RBX,%RAX,8),%XMM13,%XMM0 |
(506) 0x45b7ec VMULSD %XMM6,%XMM2,%XMM2 |
(506) 0x45b7f0 VCOMISD %XMM5,%XMM15 |
(506) 0x45b7f4 VDIVSD %XMM0,%XMM23,%XMM24 |
(506) 0x45b7fa VUNPCKHPD %XMM14,%XMM14,%XMM4 |
(506) 0x45b7ff VADDPD %XMM14,%XMM4,%XMM4 |
(506) 0x45b804 VMOVSD (%R12,%RAX,8),%XMM14 |
(506) 0x45b80a VSUBSD (%R13,%RAX,8),%XMM14,%XMM0 |
(506) 0x45b811 VMOVSD %XMM4,%XMM4,%XMM21 |
(506) 0x45b817 VADDSD (%R11),%XMM3,%XMM4 |
(506) 0x45b81c VDIVSD %XMM4,%XMM0,%XMM0 |
(506) 0x45b820 VMULSD %XMM24,%XMM24,%XMM25 |
(506) 0x45b826 VDIVSD %XMM13,%XMM2,%XMM2 |
(506) 0x45b82b VMULSD %XMM24,%XMM0,%XMM14 |
(506) 0x45b831 VMULSD %XMM0,%XMM0,%XMM4 |
(506) 0x45b835 VMULSD %XMM21,%XMM14,%XMM14 |
(506) 0x45b83b VDIVSD %XMM3,%XMM1,%XMM1 |
(506) 0x45b83f VFMADD132SD %XMM25,%XMM14,%XMM2 |
(506) 0x45b845 VFMADD132SD %XMM4,%XMM2,%XMM1 |
(506) 0x45b84a VADDSD %XMM25,%XMM4,%XMM4 |
(506) 0x45b850 VMAXSD %XMM7,%XMM4,%XMM2 |
(506) 0x45b854 VDIVSD %XMM2,%XMM1,%XMM1 |
(506) 0x45b858 JAE 45b864 |
(506) 0x45b85a VCOMISD %XMM5,%XMM1 |
(506) 0x45b85e JBE 45b6a0 |
(506) 0x45b864 VXORPD %XMM13,%XMM13,%XMM13 |
(506) 0x45b869 VMOVSD %XMM13,(%R15,%RAX,8) |
(506) 0x45b86f MOV %RDX,%RAX |
(506) 0x45b872 CMP %RDX,-0x38(%RBP) |
(506) 0x45b876 JNE 45b72c |
(505) 0x45b87c MOV -0x5c(%RBP),%EBX |
(505) 0x45b87f MOV -0x58(%RBP),%R15 |
(505) 0x45b883 CMP %EBX,-0x80(%RBP) |
(505) 0x45b886 JLE 45b8b0 |
(505) 0x45b888 MOV -0x88(%RBP),%R12D |
(505) 0x45b88f MOV -0x60(%RBP),%ECX |
(505) 0x45b892 MOV %EBX,-0x48(%RBP) |
(505) 0x45b895 MOV %EBX,%EAX |
(505) 0x45b897 MOV -0x7c(%RBP),%EDX |
(505) 0x45b89a MOV -0x84(%RBP),%R9D |
(505) 0x45b8a1 SUB %ECX,%R12D |
(505) 0x45b8a4 JMP 45b560 |
0x45b8a9 NOPL (%RAX) |
0x45b8b0 ADD $0x88,%RSP |
0x45b8b7 POP %RBX |
0x45b8b8 POP %R12 |
0x45b8ba POP %R13 |
0x45b8bc POP %R14 |
0x45b8be POP %R15 |
0x45b8c0 POP %RBP |
0x45b8c1 RET |
0x45b8c2 NOPW (%RAX,%RAX,1) |
(506) 0x45b8c8 VCOMISD %XMM5,%XMM14 |
(506) 0x45b8cc VXORPD %XMM12,%XMM14,%XMM4 |
(506) 0x45b8d1 VMOVSD %XMM4,%XMM4,%XMM14 |
(506) 0x45b8d5 JBE 45b8f7 |
(506) 0x45b8d7 VMOVSD %XMM11,%XMM11,%XMM2 |
(506) 0x45b8db JMP 45b6ba |
(505) 0x45b8e0 MOV %ECX,-0x60(%RBP) |
(505) 0x45b8e3 LEA 0x1(%R15),%RCX |
(505) 0x45b8e7 MOV %RCX,-0x58(%RBP) |
(505) 0x45b8eb JMP 45b87c |
0x45b8ed INC %R12D |
0x45b8f0 XOR %EDX,%EDX |
0x45b8f2 JMP 45b4b2 |
(506) 0x45b8f7 VMOVSD 0x7cd9(%RIP),%XMM2 |
(506) 0x45b8ff JMP 45b6ba |
0x45b904 NOPW %CS:(%RAX,%RAX,1) |
0x45b90f NOP |
Path / |
Source file and lines | viscosity.cpp:36-64 |
Module | exec |
nb instructions | 91 |
nb uops | 100 |
loop length | 362 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 16.67 cycles |
front end | 16.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.10 | 8.00 | 8.33 | 8.33 | 9.50 | 6.07 | 5.90 | 9.50 | 9.50 | 9.50 | 5.93 | 8.33 |
cycles | 6.10 | 11.93 | 8.33 | 8.33 | 9.50 | 6.07 | 5.90 | 9.50 | 9.50 | 9.50 | 5.93 | 8.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 16.09-16.18 |
Stall cycles | 0.00 |
Front-end | 16.67 |
Dispatch | 11.93 |
DIV/SQRT | 12.00 |
Overall L1 | 16.67 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 33% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 9% |
load | 11% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 16% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 11% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB $0x88,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x40(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x44(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x3c(%RBX),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RDX),%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %ECX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,-0x84(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %ECX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 45b8b0 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x480> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %ECX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x2(%RSI),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 45b8b0 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x480> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EDI,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8D,-0x7c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x7c(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R13D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 45b8ed <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x4bd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %R12D,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%R12,%RCX,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 45b8b0 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x480> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %ECX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x84(%RBP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL -0x7c(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x8(%RBX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVQ 0x7592(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80e2(%RIP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x819b(%RIP),%XMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70ab(%RIP),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RDX,%R10,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R15,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV 0x28(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11D,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R9D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R11D,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RBX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x88,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 45b4b2 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x82> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | viscosity.cpp:36-64 |
Module | exec |
nb instructions | 91 |
nb uops | 100 |
loop length | 362 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 16.67 cycles |
front end | 16.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.10 | 8.00 | 8.33 | 8.33 | 9.50 | 6.07 | 5.90 | 9.50 | 9.50 | 9.50 | 5.93 | 8.33 |
cycles | 6.10 | 11.93 | 8.33 | 8.33 | 9.50 | 6.07 | 5.90 | 9.50 | 9.50 | 9.50 | 5.93 | 8.33 |
Cycles executing div or sqrt instructions | 12.00 |
FE+BE cycles | 16.09-16.18 |
Stall cycles | 0.00 |
Front-end | 16.67 |
Dispatch | 11.93 |
DIV/SQRT | 12.00 |
Overall L1 | 16.67 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 33% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 3% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 14% |
all | 9% |
load | 11% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 6% |
all | 16% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 9% |
load | 11% |
store | 9% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 9% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB $0x88,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x40(%RDI),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x44(%RDI),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RDI),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x3c(%RBX),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA 0x1(%RAX),%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x1(%RDX),%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %ECX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %EDI,-0x84(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %ECX,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 45b8b0 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x480> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %ECX,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x2(%RSI),%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB %R15D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R14D,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 45b8b0 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x480> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R14D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %EDI,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8D,-0x7c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4046c0 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4045b0 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %EAX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x7c(%RBP),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
IMUL %R13D,%EAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 45b8ed <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x4bd> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %R12D,%ECX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%R12,%RCX,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R9D,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 45b8b0 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x480> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %ECX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x84(%RBP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DIVL -0x7c(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 6 |
MOV 0x8(%RBX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVQ 0x7592(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x80e2(%RIP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDI,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVQ 0x819b(%RIP),%XMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x70ab(%RIP),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%RDX,%R10,1),%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %R14D,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R15,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV 0x28(%RBX),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11D,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB %R9D,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R11D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R14,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R15,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R11D,%R15 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RBX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x88,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 45b4b2 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0+0x82> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_._omp_fn.0.lto_priv.0– | 2.71 | 2.03 |
▼Loop 505 - viscosity.cpp:38-64 - exec– | 0 | 0 |
○Loop 506 - viscosity.cpp:39-64 - exec | 2.71 | 2.02 |