Function: hypre_BoomerAMGBuildMultipass.extracted.27 | Module: libparcsr_ls.so | Source: par_multi_interp.c:1575-1663 [...] | Coverage: 0.36% |
---|
Function: hypre_BoomerAMGBuildMultipass.extracted.27 | Module: libparcsr_ls.so | Source: par_multi_interp.c:1575-1663 [...] | Coverage: 0.36% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-3872/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c: 1575 - 1663 |
-------------------------------------------------------------------------------- |
1575: #pragma omp parallel private(thread_start,thread_stop,my_thread_num,num_threads,k,k1,i,i1,j,j1,sum_C,sum_N,j_start,j_end,cnt,tmp_marker,tmp_marker_offd,cnt_offd,diagonal,alfa) |
[...] |
1585: if (n_fine) |
1586: { tmp_marker = hypre_CTAlloc(HYPRE_Int,n_fine); } |
1587: tmp_marker_offd = NULL; |
1588: if (num_cols_offd) |
1589: { tmp_marker_offd = hypre_CTAlloc(HYPRE_Int,num_cols_offd); } |
1590: for (i=0; i < n_fine; i++) |
1591: { tmp_marker[i] = -1; } |
1592: for (i=0; i < num_cols_offd; i++) |
1593: { tmp_marker_offd[i] = -1; } |
1594: |
1595: /* Compute this thread's range of pass_length */ |
1596: my_thread_num = hypre_GetThreadNum(); |
1597: num_threads = hypre_NumActiveThreads(); |
1598: thread_start = pass_pointer[1] + (pass_length/num_threads)*my_thread_num; |
1599: if (my_thread_num == num_threads-1) |
[...] |
1605: for (i=thread_start; i < thread_stop; i++) |
1606: { |
1607: i1 = pass_array[i]; |
1608: sum_C = 0; |
1609: sum_N = 0; |
1610: j_start = P_diag_start[i1]; |
1611: j_end = j_start+P_diag_i[i1+1]-P_diag_i[i1]; |
1612: for (j=j_start; j < j_end; j++) |
1613: { |
1614: k1 = P_diag_pass[1][j]; |
1615: tmp_marker[C_array[k1]] = i1; |
1616: } |
1617: cnt = P_diag_i[i1]; |
1618: for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++) |
1619: { |
1620: j1 = A_diag_j[j]; |
1621: if (CF_marker[j1] != -3 && |
1622: (num_functions == 1 || dof_func[i1] == dof_func[j1])) |
1623: sum_N += A_diag_data[j]; |
1624: if (j1 != -1 && tmp_marker[j1] == i1) |
1625: { |
1626: P_diag_data[cnt] = A_diag_data[j]; |
1627: P_diag_j[cnt++] = fine_to_coarse[j1]; |
1628: sum_C += A_diag_data[j]; |
1629: } |
1630: } |
1631: j_start = P_offd_start[i1]; |
1632: j_end = j_start+P_offd_i[i1+1]-P_offd_i[i1]; |
1633: for (j=j_start; j < j_end; j++) |
1634: { |
1635: k1 = P_offd_pass[1][j]; |
1636: tmp_marker_offd[C_array_offd[k1]] = i1; |
1637: } |
1638: cnt_offd = P_offd_i[i1]; |
1639: for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++) |
1640: { |
1641: if (col_offd_S_to_A) |
1642: j1 = map_A_to_S[A_offd_j[j]]; |
1643: else |
1644: j1 = A_offd_j[j]; |
1645: if (CF_marker_offd[j1] != -3 && |
1646: (num_functions == 1 || dof_func[i1] == dof_func_offd[j1])) |
1647: sum_N += A_offd_data[j]; |
1648: if (j1 != -1 && tmp_marker_offd[j1] == i1) |
1649: { |
1650: P_offd_data[cnt_offd] = A_offd_data[j]; |
1651: P_offd_j[cnt_offd++] = map_S_to_new[j1]; |
1652: sum_C += A_offd_data[j]; |
1653: } |
1654: } |
1655: diagonal = A_diag_data[A_diag_i[i1]]; |
1656: if (sum_C*diagonal) alfa = -sum_N/(sum_C*diagonal); |
1657: for (j=P_diag_i[i1]; j < cnt; j++) |
1658: P_diag_data[j] *= alfa; |
1659: for (j=P_offd_i[i1]; j < cnt_offd; j++) |
1660: P_offd_data[j] *= alfa; |
1661: } |
1662: hypre_TFree(tmp_marker); |
1663: hypre_TFree(tmp_marker_offd); |
0x549a0 PUSH %RBP |
0x549a1 MOV %RSP,%RBP |
0x549a4 PUSH %R15 |
0x549a6 PUSH %R14 |
0x549a8 PUSH %R13 |
0x549aa PUSH %R12 |
0x549ac PUSH %RBX |
0x549ad SUB $0xf8,%RSP |
0x549b4 MOV %R9,%R12 |
0x549b7 MOV %R8,-0x90(%RBP) |
0x549be MOV %RCX,-0xa0(%RBP) |
0x549c5 MOV %RDX,-0xe8(%RBP) |
0x549cc MOV 0xe8(%RBP),%RAX |
0x549d3 MOV %RAX,-0x38(%RBP) |
0x549d7 MOV 0xe0(%RBP),%RAX |
0x549de MOV %RAX,-0xe0(%RBP) |
0x549e5 MOV 0xd8(%RBP),%RCX |
0x549ec MOV 0xd0(%RBP),%RAX |
0x549f3 MOV %RAX,-0x120(%RBP) |
0x549fa MOV 0xc8(%RBP),%RAX |
0x54a01 MOV %RAX,-0x118(%RBP) |
0x54a08 MOV 0xc0(%RBP),%RAX |
0x54a0f MOV %RAX,-0xd8(%RBP) |
0x54a16 MOV 0xb8(%RBP),%RAX |
0x54a1d MOV %RAX,-0xd0(%RBP) |
0x54a24 MOV 0xb0(%RBP),%RAX |
0x54a2b MOV %RAX,-0xa8(%RBP) |
0x54a32 MOV 0xa8(%RBP),%RAX |
0x54a39 MOV %RAX,-0xf8(%RBP) |
0x54a40 MOV 0xa0(%RBP),%RAX |
0x54a47 MOV %RAX,-0x30(%RBP) |
0x54a4b MOV 0x98(%RBP),%RAX |
0x54a52 MOV %RAX,-0x60(%RBP) |
0x54a56 MOV 0x90(%RBP),%RAX |
0x54a5d MOV %RAX,-0xc8(%RBP) |
0x54a64 MOV 0x88(%RBP),%RAX |
0x54a6b MOV %RAX,-0xc0(%RBP) |
0x54a72 MOV 0x80(%RBP),%RAX |
0x54a79 MOV %RAX,-0x100(%RBP) |
0x54a80 MOV 0x78(%RBP),%RBX |
0x54a84 MOV 0x70(%RBP),%RAX |
0x54a88 MOV %RAX,-0x110(%RBP) |
0x54a8f MOV 0x68(%RBP),%RAX |
0x54a93 MOV %RAX,-0x40(%RBP) |
0x54a97 MOV 0x60(%RBP),%RAX |
0x54a9b MOV %RAX,-0x80(%RBP) |
0x54a9f MOV 0x58(%RBP),%RAX |
0x54aa3 MOV %RAX,-0x108(%RBP) |
0x54aaa MOV 0x50(%RBP),%RAX |
0x54aae MOV %RAX,-0x48(%RBP) |
0x54ab2 MOV 0x48(%RBP),%RAX |
0x54ab6 MOV %RAX,-0x78(%RBP) |
0x54aba MOV 0x40(%RBP),%RDX |
0x54abe MOV 0x38(%RBP),%RAX |
0x54ac2 MOV %RAX,-0xb8(%RBP) |
0x54ac9 MOV 0x30(%RBP),%RAX |
0x54acd MOV %RAX,-0x88(%RBP) |
0x54ad4 MOV 0x28(%RBP),%RAX |
0x54ad8 MOV %RAX,-0x98(%RBP) |
0x54adf MOV 0x20(%RBP),%RAX |
0x54ae3 MOV %RAX,-0x58(%RBP) |
0x54ae7 MOV 0x18(%RBP),%RAX |
0x54aeb MOV %RAX,-0x68(%RBP) |
0x54aef MOV 0x10(%RBP),%RAX |
0x54af3 MOV %RAX,-0x70(%RBP) |
0x54af7 MOV %RCX,%R15 |
0x54afa MOV (%RCX),%RDI |
0x54afd TEST %RDI,%RDI |
0x54b00 JE 54b17 |
0x54b02 MOV $0x8,%ESI |
0x54b07 MOV %RDX,%R14 |
0x54b0a CALL e870 <hypre_CAlloc@plt> |
0x54b0f MOV %R14,%RDX |
0x54b12 MOV %RAX,%R13 |
0x54b15 JMP 54b1a |
0x54b17 XOR %R13D,%R13D |
0x54b1a MOV (%RDX),%RDI |
0x54b1d TEST %RDI,%RDI |
0x54b20 JE 54b3d |
0x54b22 MOV $0x8,%ESI |
0x54b27 MOV %RDX,%R14 |
0x54b2a CALL e870 <hypre_CAlloc@plt> |
0x54b2f MOV %R14,%RDX |
0x54b32 MOV %RAX,%R14 |
0x54b35 CMPQ $0,(%R15) |
0x54b39 JG 54b46 |
0x54b3b JMP 54b61 |
0x54b3d XOR %R14D,%R14D |
0x54b40 CMPQ $0,(%R15) |
0x54b44 JLE 54b61 |
0x54b46 XOR %EAX,%EAX |
0x54b48 NOPL (%RAX,%RAX,1) |
(1512) 0x54b50 MOVQ $-0x1,(%R13,%RAX,8) |
(1512) 0x54b59 INC %RAX |
(1512) 0x54b5c CMP (%R15),%RAX |
(1512) 0x54b5f JL 54b50 |
0x54b61 CMPQ $0,(%RDX) |
0x54b65 JLE 54b80 |
0x54b67 XOR %EAX,%EAX |
0x54b69 NOPL (%RAX) |
(1511) 0x54b70 MOVQ $-0x1,(%R14,%RAX,8) |
(1511) 0x54b78 INC %RAX |
(1511) 0x54b7b CMP (%RDX),%RAX |
(1511) 0x54b7e JL 54b70 |
0x54b80 CALL def0 <hypre_GetThreadNum@plt> |
0x54b85 MOV %RAX,%R15 |
0x54b88 CALL e6c0 <hypre_NumActiveThreads@plt> |
0x54b8d MOV %RAX,%RCX |
0x54b90 MOV -0x30(%RBP),%RAX |
0x54b94 MOV (%RAX),%RAX |
0x54b97 MOV 0x8(%RAX),%RDI |
0x54b9b MOV -0x38(%RBP),%RAX |
0x54b9f MOV (%RAX),%RSI |
0x54ba2 MOV %RSI,%RAX |
0x54ba5 OR %RCX,%RAX |
0x54ba8 SHR $0x20,%RAX |
0x54bac JE 54bb8 |
0x54bae MOV %RSI,%RAX |
0x54bb1 CQTO |
0x54bb3 IDIV %RCX |
0x54bb6 JMP 54bbe |
0x54bb8 MOV %ESI,%EAX |
0x54bba XOR %EDX,%EDX |
0x54bbc DIV %ECX |
0x54bbe MOV -0x58(%RBP),%R8 |
0x54bc2 MOV %RAX,%R11 |
0x54bc5 IMUL %R15,%R11 |
0x54bc9 DEC %RCX |
0x54bcc LEA 0x1(%R15),%RDX |
0x54bd0 IMUL %RAX,%RDX |
0x54bd4 CMP %RCX,%R15 |
0x54bd7 CMOVE %RSI,%RDX |
0x54bdb MOV %RDX,-0x50(%RBP) |
0x54bdf CMP %RDX,%R11 |
0x54be2 JGE 55137 |
0x54be8 MOV -0x50(%RBP),%RAX |
0x54bec ADD %RDI,%RAX |
0x54bef MOV %RAX,-0x50(%RBP) |
0x54bf3 ADD %RDI,%R11 |
0x54bf6 MOV -0x60(%RBP),%RAX |
0x54bfa MOV (%RAX),%RAX |
0x54bfd MOV %RAX,-0xf0(%RBP) |
0x54c04 MOV -0x48(%RBP),%RAX |
0x54c08 MOV (%RAX),%RAX |
0x54c0b MOV %RAX,-0x38(%RBP) |
0x54c0f MOV -0xa8(%RBP),%RAX |
0x54c16 MOV (%RAX),%RAX |
0x54c19 MOV %RAX,-0x48(%RBP) |
0x54c1d MOV -0x40(%RBP),%RAX |
0x54c21 MOV (%RAX),%RAX |
0x54c24 MOV %RAX,-0x30(%RBP) |
0x54c28 MOV -0x70(%RBP),%RAX |
0x54c2c MOV (%RAX),%RAX |
0x54c2f MOV %RAX,-0x40(%RBP) |
0x54c33 VXORPD %XMM0,%XMM0,%XMM0 |
0x54c37 VMOVDDUP 0x4d7e9(%RIP),%XMM1 |
0x54c3f MOV %R12,-0xb0(%RBP) |
0x54c46 JMP 54c61 |
0x54c48 NOPL (%RAX,%RAX,1) |
(1500) 0x54c50 INC %R11 |
(1500) 0x54c53 CMP -0x50(%RBP),%R11 |
(1500) 0x54c57 MOV -0x58(%RBP),%R8 |
(1500) 0x54c5b JGE 55137 |
(1500) 0x54c61 MOV %R11,-0x60(%RBP) |
(1500) 0x54c65 MOV -0xf0(%RBP),%RAX |
(1500) 0x54c6c MOV (%RAX,%R11,8),%R10 |
(1500) 0x54c70 MOV -0xf8(%RBP),%RAX |
(1500) 0x54c77 MOV (%RAX,%R10,8),%RDI |
(1500) 0x54c7b MOV -0x38(%RBP),%RAX |
(1500) 0x54c7f MOV (%RAX,%R10,8),%RCX |
(1500) 0x54c83 MOV 0x8(%RAX,%R10,8),%RAX |
(1500) 0x54c88 LEA (%RAX,%RDI,1),%RDX |
(1500) 0x54c8c SUB %RCX,%RDX |
(1500) 0x54c8f CMP %RDX,%RDI |
(1500) 0x54c92 JGE 54d65 |
(1500) 0x54c98 MOV -0xd0(%RBP),%RDX |
(1500) 0x54c9f MOV (%RDX),%RDX |
(1500) 0x54ca2 MOV 0x8(%RDX),%R9 |
(1500) 0x54ca6 MOV -0xc0(%RBP),%RDX |
(1500) 0x54cad MOV (%RDX),%RSI |
(1500) 0x54cb0 SUB %RCX,%RAX |
(1500) 0x54cb3 CMP $0x8,%RAX |
(1500) 0x54cb7 JB 54d40 |
(1500) 0x54cbd MOV %RAX,%R11 |
(1500) 0x54cc0 SHR $0x3,%R11 |
(1500) 0x54cc4 LEA 0x38(%R9,%RDI,8),%RCX |
(1500) 0x54cc9 NOPL (%RAX) |
(1510) 0x54cd0 MOV -0x38(%RCX),%RDX |
(1510) 0x54cd4 MOV (%RSI,%RDX,8),%RDX |
(1510) 0x54cd8 MOV %R10,(%R13,%RDX,8) |
(1510) 0x54cdd MOV -0x30(%RCX),%RDX |
(1510) 0x54ce1 MOV (%RSI,%RDX,8),%RDX |
(1510) 0x54ce5 MOV %R10,(%R13,%RDX,8) |
(1510) 0x54cea MOV -0x28(%RCX),%RDX |
(1510) 0x54cee MOV (%RSI,%RDX,8),%RDX |
(1510) 0x54cf2 MOV %R10,(%R13,%RDX,8) |
(1510) 0x54cf7 MOV -0x20(%RCX),%RDX |
(1510) 0x54cfb MOV (%RSI,%RDX,8),%RDX |
(1510) 0x54cff MOV %R10,(%R13,%RDX,8) |
(1510) 0x54d04 MOV -0x18(%RCX),%RDX |
(1510) 0x54d08 MOV (%RSI,%RDX,8),%RDX |
(1510) 0x54d0c MOV %R10,(%R13,%RDX,8) |
(1510) 0x54d11 MOV -0x10(%RCX),%RDX |
(1510) 0x54d15 MOV (%RSI,%RDX,8),%RDX |
(1510) 0x54d19 MOV %R10,(%R13,%RDX,8) |
(1510) 0x54d1e MOV -0x8(%RCX),%RDX |
(1510) 0x54d22 MOV (%RSI,%RDX,8),%RDX |
(1510) 0x54d26 MOV %R10,(%R13,%RDX,8) |
(1510) 0x54d2b MOV (%RCX),%RDX |
(1510) 0x54d2e MOV (%RSI,%RDX,8),%RDX |
(1510) 0x54d32 MOV %R10,(%R13,%RDX,8) |
(1510) 0x54d37 ADD $0x40,%RCX |
(1510) 0x54d3b DEC %R11 |
(1510) 0x54d3e JNE 54cd0 |
(1500) 0x54d40 MOV %RAX,%RCX |
(1500) 0x54d43 AND $-0x8,%RCX |
(1500) 0x54d47 CMP %RAX,%RCX |
(1500) 0x54d4a JE 54d65 |
(1500) 0x54d4c LEA (%R9,%RDI,8),%RDI |
(1509) 0x54d50 MOV (%RDI,%RCX,8),%RDX |
(1509) 0x54d54 MOV (%RSI,%RDX,8),%RDX |
(1509) 0x54d58 MOV %R10,(%R13,%RDX,8) |
(1509) 0x54d5d INC %RCX |
(1509) 0x54d60 CMP %RCX,%RAX |
(1509) 0x54d63 JNE 54d50 |
(1500) 0x54d65 MOV -0x38(%RBP),%RAX |
(1500) 0x54d69 MOV (%RAX,%R10,8),%R11 |
(1500) 0x54d6d MOV -0x68(%RBP),%RCX |
(1500) 0x54d71 MOV (%RCX,%R10,8),%RAX |
(1500) 0x54d75 MOV 0x8(%RCX,%R10,8),%RSI |
(1500) 0x54d7a INC %RAX |
(1500) 0x54d7d VXORPD %XMM4,%XMM4,%XMM4 |
(1500) 0x54d81 VXORPD %XMM3,%XMM3,%XMM3 |
(1500) 0x54d85 CMP %RSI,%RAX |
(1500) 0x54d88 JGE 54e30 |
(1500) 0x54d8e MOV -0xe8(%RBP),%RDI |
(1500) 0x54d95 MOV -0xe0(%RBP),%R9 |
(1500) 0x54d9c MOV -0x90(%RBP),%R15 |
(1500) 0x54da3 JMP 54db8 |
0x54da5 NOPW %CS:(%RAX,%RAX,1) |
(1508) 0x54db0 INC %RAX |
(1508) 0x54db3 CMP %RSI,%RAX |
(1508) 0x54db6 JGE 54e30 |
(1508) 0x54db8 MOV (%R8,%RAX,8),%RCX |
(1508) 0x54dbc CMPQ $-0x3,(%RDI,%RCX,8) |
(1508) 0x54dc1 JE 54de3 |
(1508) 0x54dc3 CMPQ $0x1,-0xa0(%RBP) |
(1508) 0x54dcb JE 54dd7 |
(1508) 0x54dcd MOV (%R15,%R10,8),%RDX |
(1508) 0x54dd1 CMP (%R15,%RCX,8),%RDX |
(1508) 0x54dd5 JNE 54de3 |
(1508) 0x54dd7 MOV -0x70(%RBP),%RDX |
(1508) 0x54ddb MOV (%RDX),%RDX |
(1508) 0x54dde VADDSD (%RDX,%RAX,8),%XMM3,%XMM3 |
(1508) 0x54de3 CMP $-0x1,%RCX |
(1508) 0x54de7 JE 54db0 |
(1508) 0x54de9 CMP %R10,(%R13,%RCX,8) |
(1508) 0x54dee JNE 54db0 |
(1508) 0x54df0 MOV -0x70(%RBP),%RDX |
(1508) 0x54df4 MOV (%RDX),%RDX |
(1508) 0x54df7 VMOVSD (%RDX,%RAX,8),%XMM5 |
(1508) 0x54dfc MOV -0x78(%RBP),%RDX |
(1508) 0x54e00 MOV (%RDX),%RDX |
(1508) 0x54e03 VMOVSD %XMM5,(%RDX,%R11,8) |
(1508) 0x54e09 MOV (%R9,%RCX,8),%RCX |
(1508) 0x54e0d MOV -0x108(%RBP),%RDX |
(1508) 0x54e14 MOV %RCX,(%RDX,%R11,8) |
(1508) 0x54e18 INC %R11 |
(1508) 0x54e1b VADDSD %XMM4,%XMM5,%XMM4 |
(1508) 0x54e1f MOV -0x68(%RBP),%RCX |
(1508) 0x54e23 MOV 0x8(%RCX,%R10,8),%RSI |
(1508) 0x54e28 JMP 54db0 |
0x54e2a NOPW (%RAX,%RAX,1) |
(1500) 0x54e30 MOV -0x48(%RBP),%RAX |
(1500) 0x54e34 MOV (%RAX,%R10,8),%RDI |
(1500) 0x54e38 MOV -0x30(%RBP),%RCX |
(1500) 0x54e3c MOV (%RCX,%R10,8),%RAX |
(1500) 0x54e40 MOV 0x8(%RCX,%R10,8),%RCX |
(1500) 0x54e45 LEA (%RCX,%RDI,1),%RDX |
(1500) 0x54e49 SUB %RAX,%RDX |
(1500) 0x54e4c CMP %RDX,%RDI |
(1500) 0x54e4f JGE 54f24 |
(1500) 0x54e55 MOV -0xd8(%RBP),%RDX |
(1500) 0x54e5c MOV (%RDX),%RDX |
(1500) 0x54e5f MOV 0x8(%RDX),%R9 |
(1500) 0x54e63 MOV -0xc8(%RBP),%RDX |
(1500) 0x54e6a MOV (%RDX),%RSI |
(1500) 0x54e6d SUB %RAX,%RCX |
(1500) 0x54e70 CMP $0x8,%RCX |
(1500) 0x54e74 JB 54ef8 |
(1500) 0x54e7a MOV %RCX,%RAX |
(1500) 0x54e7d SHR $0x3,%RAX |
(1500) 0x54e81 LEA 0x38(%R9,%RDI,8),%R8 |
(1500) 0x54e86 NOPW %CS:(%RAX,%RAX,1) |
(1507) 0x54e90 MOV -0x38(%R8),%RDX |
(1507) 0x54e94 MOV (%RSI,%RDX,8),%RDX |
(1507) 0x54e98 MOV %R10,(%R14,%RDX,8) |
(1507) 0x54e9c MOV -0x30(%R8),%RDX |
(1507) 0x54ea0 MOV (%RSI,%RDX,8),%RDX |
(1507) 0x54ea4 MOV %R10,(%R14,%RDX,8) |
(1507) 0x54ea8 MOV -0x28(%R8),%RDX |
(1507) 0x54eac MOV (%RSI,%RDX,8),%RDX |
(1507) 0x54eb0 MOV %R10,(%R14,%RDX,8) |
(1507) 0x54eb4 MOV -0x20(%R8),%RDX |
(1507) 0x54eb8 MOV (%RSI,%RDX,8),%RDX |
(1507) 0x54ebc MOV %R10,(%R14,%RDX,8) |
(1507) 0x54ec0 MOV -0x18(%R8),%RDX |
(1507) 0x54ec4 MOV (%RSI,%RDX,8),%RDX |
(1507) 0x54ec8 MOV %R10,(%R14,%RDX,8) |
(1507) 0x54ecc MOV -0x10(%R8),%RDX |
(1507) 0x54ed0 MOV (%RSI,%RDX,8),%RDX |
(1507) 0x54ed4 MOV %R10,(%R14,%RDX,8) |
(1507) 0x54ed8 MOV -0x8(%R8),%RDX |
(1507) 0x54edc MOV (%RSI,%RDX,8),%RDX |
(1507) 0x54ee0 MOV %R10,(%R14,%RDX,8) |
(1507) 0x54ee4 MOV (%R8),%RDX |
(1507) 0x54ee7 MOV (%RSI,%RDX,8),%RDX |
(1507) 0x54eeb MOV %R10,(%R14,%RDX,8) |
(1507) 0x54eef ADD $0x40,%R8 |
(1507) 0x54ef3 DEC %RAX |
(1507) 0x54ef6 JNE 54e90 |
(1500) 0x54ef8 MOV %RCX,%RAX |
(1500) 0x54efb AND $-0x8,%RAX |
(1500) 0x54eff CMP %RCX,%RAX |
(1500) 0x54f02 JE 54f24 |
(1500) 0x54f04 LEA (%R9,%RDI,8),%RDI |
(1500) 0x54f08 NOPL (%RAX,%RAX,1) |
(1506) 0x54f10 MOV (%RDI,%RAX,8),%RDX |
(1506) 0x54f14 MOV (%RSI,%RDX,8),%RDX |
(1506) 0x54f18 MOV %R10,(%R14,%RDX,8) |
(1506) 0x54f1c INC %RAX |
(1506) 0x54f1f CMP %RAX,%RCX |
(1506) 0x54f22 JNE 54f10 |
(1500) 0x54f24 MOV -0x30(%RBP),%RAX |
(1500) 0x54f28 MOV (%RAX,%R10,8),%RAX |
(1500) 0x54f2c MOV -0x88(%RBP),%RCX |
(1500) 0x54f33 MOV (%RCX,%R10,8),%RSI |
(1500) 0x54f37 MOV 0x8(%RCX,%R10,8),%RDI |
(1500) 0x54f3c CMP %RDI,%RSI |
(1500) 0x54f3f JGE 55020 |
(1500) 0x54f45 MOV -0xb8(%RBP),%RCX |
(1500) 0x54f4c LEA (%RCX,%RSI,8),%R9 |
(1500) 0x54f50 JMP 54f70 |
0x54f52 NOPW %CS:(%RAX,%RAX,1) |
(1505) 0x54f60 INC %RSI |
(1505) 0x54f63 ADD $0x8,%R9 |
(1505) 0x54f67 CMP %RDI,%RSI |
(1505) 0x54f6a JGE 55020 |
(1505) 0x54f70 MOV %R9,%RCX |
(1505) 0x54f73 TEST %R12,%R12 |
(1505) 0x54f76 JE 54f86 |
(1505) 0x54f78 MOV (%R9),%RCX |
(1505) 0x54f7b MOV -0x120(%RBP),%RDX |
(1505) 0x54f82 LEA (%RDX,%RCX,8),%RCX |
(1505) 0x54f86 MOV (%RCX),%RCX |
(1505) 0x54f89 CMPQ $-0x3,(%RBX,%RCX,8) |
(1505) 0x54f8e JE 54fc4 |
(1505) 0x54f90 CMPQ $0x1,-0xa0(%RBP) |
(1505) 0x54f98 JE 54fb8 |
(1505) 0x54f9a MOV -0x90(%RBP),%RDX |
(1505) 0x54fa1 MOV (%RDX,%R10,8),%RDX |
(1505) 0x54fa5 MOV %RBX,%R15 |
(1505) 0x54fa8 MOV -0x100(%RBP),%RBX |
(1505) 0x54faf CMP (%RBX,%RCX,8),%RDX |
(1505) 0x54fb3 MOV %R15,%RBX |
(1505) 0x54fb6 JNE 54fc4 |
(1505) 0x54fb8 MOV -0x98(%RBP),%RDX |
(1505) 0x54fbf VADDSD (%RDX,%RSI,8),%XMM3,%XMM3 |
(1505) 0x54fc4 CMP $-0x1,%RCX |
(1505) 0x54fc8 JE 54f60 |
(1505) 0x54fca CMP %R10,(%R14,%RCX,8) |
(1505) 0x54fce JNE 54f60 |
(1505) 0x54fd0 MOV -0x98(%RBP),%RDX |
(1505) 0x54fd7 VMOVSD (%RDX,%RSI,8),%XMM5 |
(1505) 0x54fdc MOV -0x80(%RBP),%RDX |
(1505) 0x54fe0 MOV (%RDX),%RDX |
(1505) 0x54fe3 VMOVSD %XMM5,(%RDX,%RAX,8) |
(1505) 0x54fe8 MOV -0x118(%RBP),%RDX |
(1505) 0x54fef MOV (%RDX,%RCX,8),%RCX |
(1505) 0x54ff3 MOV -0x110(%RBP),%RDX |
(1505) 0x54ffa MOV %RCX,(%RDX,%RAX,8) |
(1505) 0x54ffe INC %RAX |
(1505) 0x55001 VADDSD %XMM4,%XMM5,%XMM4 |
(1505) 0x55005 MOV -0x88(%RBP),%RCX |
(1505) 0x5500c MOV 0x8(%RCX,%R10,8),%RDI |
(1505) 0x55011 JMP 54f60 |
0x55016 NOPW %CS:(%RAX,%RAX,1) |
(1500) 0x55020 MOV -0x68(%RBP),%RCX |
(1500) 0x55024 MOV (%RCX,%R10,8),%RCX |
(1500) 0x55028 MOV -0x40(%RBP),%RDX |
(1500) 0x5502c VMULSD (%RDX,%RCX,8),%XMM4,%XMM4 |
(1500) 0x55031 VUCOMISD %XMM0,%XMM4 |
(1500) 0x55035 JE 5503f |
(1500) 0x55037 VXORPD %XMM1,%XMM3,%XMM2 |
(1500) 0x5503b VDIVSD %XMM4,%XMM2,%XMM2 |
(1500) 0x5503f MOV -0x38(%RBP),%RCX |
(1500) 0x55043 MOV (%RCX,%R10,8),%RCX |
(1500) 0x55047 MOV %R11,%R8 |
(1500) 0x5504a SUB %RCX,%R8 |
(1500) 0x5504d JLE 550b2 |
(1500) 0x5504f MOV -0x78(%RBP),%RDX |
(1500) 0x55053 MOV (%RDX),%RSI |
(1500) 0x55056 MOV %R8,%RDI |
(1500) 0x55059 AND $-0x4,%RDI |
(1500) 0x5505d JE 55093 |
(1500) 0x5505f LEA -0x1(%RDI),%R9 |
(1500) 0x55063 VBROADCASTSD %XMM2,%YMM3 |
(1500) 0x55068 LEA (%RSI,%RCX,8),%RDX |
(1500) 0x5506c XOR %R12D,%R12D |
(1500) 0x5506f NOP |
(1504) 0x55070 VMULPD (%RDX,%R12,8),%YMM3,%YMM4 |
(1504) 0x55076 VMOVUPD %YMM4,(%RDX,%R12,8) |
(1504) 0x5507c ADD $0x4,%R12 |
(1504) 0x55080 CMP %R9,%R12 |
(1504) 0x55083 JBE 55070 |
(1500) 0x55085 CMP %RDI,%R8 |
(1500) 0x55088 MOV -0xb0(%RBP),%R12 |
(1500) 0x5508f JNE 55095 |
(1500) 0x55091 JMP 550b2 |
(1500) 0x55093 XOR %EDI,%EDI |
(1500) 0x55095 ADD %RCX,%RDI |
(1500) 0x55098 NOPL (%RAX,%RAX,1) |
(1503) 0x550a0 VMULSD (%RSI,%RDI,8),%XMM2,%XMM3 |
(1503) 0x550a5 VMOVSD %XMM3,(%RSI,%RDI,8) |
(1503) 0x550aa INC %RDI |
(1503) 0x550ad CMP %RDI,%R11 |
(1503) 0x550b0 JNE 550a0 |
(1500) 0x550b2 MOV -0x30(%RBP),%RCX |
(1500) 0x550b6 MOV (%RCX,%R10,8),%RCX |
(1500) 0x550ba MOV %RAX,%R8 |
(1500) 0x550bd SUB %RCX,%R8 |
(1500) 0x550c0 MOV -0x60(%RBP),%R11 |
(1500) 0x550c4 JLE 54c50 |
(1500) 0x550ca MOV -0x80(%RBP),%RDX |
(1500) 0x550ce MOV (%RDX),%RSI |
(1500) 0x550d1 MOV %R8,%RDI |
(1500) 0x550d4 AND $-0x4,%RDI |
(1500) 0x550d8 JE 55110 |
(1500) 0x550da LEA -0x1(%RDI),%R9 |
(1500) 0x550de VBROADCASTSD %XMM2,%YMM3 |
(1500) 0x550e3 LEA (%RSI,%RCX,8),%RDX |
(1500) 0x550e7 XOR %R10D,%R10D |
(1500) 0x550ea NOPW (%RAX,%RAX,1) |
(1502) 0x550f0 VMULPD (%RDX,%R10,8),%YMM3,%YMM4 |
(1502) 0x550f6 VMOVUPD %YMM4,(%RDX,%R10,8) |
(1502) 0x550fc ADD $0x4,%R10 |
(1502) 0x55100 CMP %R9,%R10 |
(1502) 0x55103 JBE 550f0 |
(1500) 0x55105 CMP %RDI,%R8 |
(1500) 0x55108 JE 54c50 |
(1500) 0x5510e JMP 55112 |
(1500) 0x55110 XOR %EDI,%EDI |
(1500) 0x55112 ADD %RCX,%RDI |
(1500) 0x55115 NOPW %CS:(%RAX,%RAX,1) |
(1501) 0x55120 VMULSD (%RSI,%RDI,8),%XMM2,%XMM3 |
(1501) 0x55125 VMOVSD %XMM3,(%RSI,%RDI,8) |
(1501) 0x5512a INC %RDI |
(1501) 0x5512d CMP %RDI,%RAX |
(1501) 0x55130 JNE 55120 |
(1500) 0x55132 JMP 54c50 |
0x55137 MOV %R13,%RDI |
0x5513a VZEROUPPER |
0x5513d CALL e260 <hypre_Free@plt> |
0x55142 MOV %R14,%RDI |
0x55145 ADD $0xf8,%RSP |
0x5514c POP %RBX |
0x5514d POP %R12 |
0x5514f POP %R13 |
0x55151 POP %R14 |
0x55153 POP %R15 |
0x55155 POP %RBP |
0x55156 JMP e260 |
0x5515b NOPL (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | par_multi_interp.c:1575-1663 |
Module | libparcsr_ls.so |
nb instructions | 168 |
nb uops | 181 |
loop length | 737 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 59 |
micro-operation queue | 30.17 cycles |
front end | 30.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.00 | 8.00 | 19.00 | 19.00 | 23.50 | 6.00 | 6.00 | 23.50 | 23.50 | 23.50 | 6.00 | 19.00 |
cycles | 6.00 | 10.80 | 19.00 | 19.00 | 23.50 | 6.00 | 6.00 | 23.50 | 23.50 | 23.50 | 6.00 | 19.00 |
Cycles executing div or sqrt instructions | 16.00 |
FE+BE cycles | 28.88-28.94 |
Stall cycles | 0.00 |
Front-end | 30.17 |
Dispatch | 23.50 |
DIV/SQRT | 16.00 |
Overall L1 | 30.17 |
all | 1% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 4% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 9% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 18% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0xf8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,-0xe8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xe8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xe0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xe0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xd8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x118(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xd8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x90(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x88(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x80(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x70(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x108(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RCX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RDI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 54b17 <hypre_BoomerAMGBuildMultipass.extracted.27+0x177> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL e870 <hypre_CAlloc@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R14,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 54b1a <hypre_BoomerAMGBuildMultipass.extracted.27+0x17a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RDI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 54b3d <hypre_BoomerAMGBuildMultipass.extracted.27+0x19d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL e870 <hypre_CAlloc@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R14,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMPQ $0,(%R15) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JG 54b46 <hypre_BoomerAMGBuildMultipass.extracted.27+0x1a6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 54b61 <hypre_BoomerAMGBuildMultipass.extracted.27+0x1c1> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMPQ $0,(%R15) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JLE 54b61 <hypre_BoomerAMGBuildMultipass.extracted.27+0x1c1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMPQ $0,(%RDX) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JLE 54b80 <hypre_BoomerAMGBuildMultipass.extracted.27+0x1e0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL def0 <hypre_GetThreadNum@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL e6c0 <hypre_NumActiveThreads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
OR %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
JE 54bb8 <hypre_BoomerAMGBuildMultipass.extracted.27+0x218> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %RCX | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
JMP 54bbe <hypre_BoomerAMGBuildMultipass.extracted.27+0x21e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %ECX | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R15,%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x1(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %RAX,%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
CMP %RCX,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVE %RSI,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RDX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 55137 <hypre_BoomerAMGBuildMultipass.extracted.27+0x797> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RDI,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RDI,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x70(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x4d7e9(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 54c61 <hypre_BoomerAMGBuildMultipass.extracted.27+0x2c1> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL e260 <hypre_Free@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0xf8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP e260 <hypre_Free@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | par_multi_interp.c:1575-1663 |
Module | libparcsr_ls.so |
nb instructions | 168 |
nb uops | 181 |
loop length | 737 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 59 |
micro-operation queue | 30.17 cycles |
front end | 30.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.00 | 8.00 | 19.00 | 19.00 | 23.50 | 6.00 | 6.00 | 23.50 | 23.50 | 23.50 | 6.00 | 19.00 |
cycles | 6.00 | 10.80 | 19.00 | 19.00 | 23.50 | 6.00 | 6.00 | 23.50 | 23.50 | 23.50 | 6.00 | 19.00 |
Cycles executing div or sqrt instructions | 16.00 |
FE+BE cycles | 28.88-28.94 |
Stall cycles | 0.00 |
Front-end | 30.17 |
Dispatch | 23.50 |
DIV/SQRT | 16.00 |
Overall L1 | 30.17 |
all | 1% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 4% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 9% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 18% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0xf8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,-0xe8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xe8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xe0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xe0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xd8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0xd0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x120(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x118(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xd8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xd0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x90(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x88(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x80(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x100(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x70(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x110(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x68(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x108(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x40(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RCX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RDI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 54b17 <hypre_BoomerAMGBuildMultipass.extracted.27+0x177> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL e870 <hypre_CAlloc@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R14,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 54b1a <hypre_BoomerAMGBuildMultipass.extracted.27+0x17a> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%RDX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RDI,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 54b3d <hypre_BoomerAMGBuildMultipass.extracted.27+0x19d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x8,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL e870 <hypre_CAlloc@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R14,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMPQ $0,(%R15) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JG 54b46 <hypre_BoomerAMGBuildMultipass.extracted.27+0x1a6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 54b61 <hypre_BoomerAMGBuildMultipass.extracted.27+0x1c1> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMPQ $0,(%R15) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JLE 54b61 <hypre_BoomerAMGBuildMultipass.extracted.27+0x1c1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMPQ $0,(%RDX) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JLE 54b80 <hypre_BoomerAMGBuildMultipass.extracted.27+0x1e0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL def0 <hypre_GetThreadNum@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL e6c0 <hypre_NumActiveThreads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
OR %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
JE 54bb8 <hypre_BoomerAMGBuildMultipass.extracted.27+0x218> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %RCX | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
JMP 54bbe <hypre_BoomerAMGBuildMultipass.extracted.27+0x21e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %ESI,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %ECX | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R15,%R11 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0x1(%R15),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %RAX,%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
CMP %RCX,%R15 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVE %RSI,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RDX,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 55137 <hypre_BoomerAMGBuildMultipass.extracted.27+0x797> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RDI,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RDI,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x70(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x4d7e9(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 54c61 <hypre_BoomerAMGBuildMultipass.extracted.27+0x2c1> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL e260 <hypre_Free@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0xf8,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP e260 <hypre_Free@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_BoomerAMGBuildMultipass.extracted.27– | 0.36 | 0.08 |
○Loop 1512 - par_multi_interp.c:1590-1591 - libparcsr_ls.so | 0.08 | 0.02 |
▼Loop 1500 - par_multi_interp.c:1585-1660 - libparcsr_ls.so– | 0.04 | 0.01 |
○Loop 1508 - par_multi_interp.c:1618-1628 - libparcsr_ls.so | 0.24 | 0.04 |
○Loop 1509 - par_multi_interp.c:1612-1615 - libparcsr_ls.so | 0.01 | 0 |
○Loop 1507 - par_multi_interp.c:1633-1636 - libparcsr_ls.so | 0 | 0 |
○Loop 1506 - par_multi_interp.c:1633-1636 - libparcsr_ls.so | 0 | 0 |
○Loop 1504 - par_multi_interp.c:1657-1658 - libparcsr_ls.so | 0 | 0 |
○Loop 1502 - par_multi_interp.c:1659-1660 - libparcsr_ls.so | 0 | 0 |
○Loop 1501 - par_multi_interp.c:1659-1660 - libparcsr_ls.so | 0 | 0 |
○Loop 1510 - par_multi_interp.c:1612-1615 - libparcsr_ls.so | 0 | 0 |
○Loop 1503 - par_multi_interp.c:1657-1658 - libparcsr_ls.so | 0 | 0 |
○Loop 1505 - par_multi_interp.c:1622-1652 - libparcsr_ls.so | 0 | 0 |
○Loop 1511 - par_multi_interp.c:1592-1593 - libparcsr_ls.so | 0 | 0 |