Function: hypre_BoomerAMGBuildMultipass.extracted.27 | Module: exec | Source: par_multi_interp.c:1575-1663 [...] | Coverage: 0.57% |
---|
Function: hypre_BoomerAMGBuildMultipass.extracted.27 | Module: exec | Source: par_multi_interp.c:1575-1663 [...] | Coverage: 0.57% |
---|
/home/kcamus/qaas_runs/169-443-9681/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c: 1575 - 1663 |
-------------------------------------------------------------------------------- |
1575: #pragma omp parallel private(thread_start,thread_stop,my_thread_num,num_threads,k,k1,i,i1,j,j1,sum_C,sum_N,j_start,j_end,cnt,tmp_marker,tmp_marker_offd,cnt_offd,diagonal,alfa) |
[...] |
1585: if (n_fine) |
1586: { tmp_marker = hypre_CTAlloc(HYPRE_Int,n_fine); } |
1587: tmp_marker_offd = NULL; |
1588: if (num_cols_offd) |
1589: { tmp_marker_offd = hypre_CTAlloc(HYPRE_Int,num_cols_offd); } |
1590: for (i=0; i < n_fine; i++) |
1591: { tmp_marker[i] = -1; } |
1592: for (i=0; i < num_cols_offd; i++) |
1593: { tmp_marker_offd[i] = -1; } |
1594: |
1595: /* Compute this thread's range of pass_length */ |
1596: my_thread_num = hypre_GetThreadNum(); |
1597: num_threads = hypre_NumActiveThreads(); |
1598: thread_start = pass_pointer[1] + (pass_length/num_threads)*my_thread_num; |
1599: if (my_thread_num == num_threads-1) |
[...] |
1605: for (i=thread_start; i < thread_stop; i++) |
1606: { |
1607: i1 = pass_array[i]; |
1608: sum_C = 0; |
1609: sum_N = 0; |
1610: j_start = P_diag_start[i1]; |
1611: j_end = j_start+P_diag_i[i1+1]-P_diag_i[i1]; |
1612: for (j=j_start; j < j_end; j++) |
1613: { |
1614: k1 = P_diag_pass[1][j]; |
1615: tmp_marker[C_array[k1]] = i1; |
1616: } |
1617: cnt = P_diag_i[i1]; |
1618: for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++) |
1619: { |
1620: j1 = A_diag_j[j]; |
1621: if (CF_marker[j1] != -3 && |
1622: (num_functions == 1 || dof_func[i1] == dof_func[j1])) |
1623: sum_N += A_diag_data[j]; |
1624: if (j1 != -1 && tmp_marker[j1] == i1) |
1625: { |
1626: P_diag_data[cnt] = A_diag_data[j]; |
1627: P_diag_j[cnt++] = fine_to_coarse[j1]; |
1628: sum_C += A_diag_data[j]; |
1629: } |
1630: } |
1631: j_start = P_offd_start[i1]; |
1632: j_end = j_start+P_offd_i[i1+1]-P_offd_i[i1]; |
1633: for (j=j_start; j < j_end; j++) |
1634: { |
1635: k1 = P_offd_pass[1][j]; |
1636: tmp_marker_offd[C_array_offd[k1]] = i1; |
1637: } |
1638: cnt_offd = P_offd_i[i1]; |
1639: for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++) |
1640: { |
1641: if (col_offd_S_to_A) |
1642: j1 = map_A_to_S[A_offd_j[j]]; |
1643: else |
1644: j1 = A_offd_j[j]; |
1645: if (CF_marker_offd[j1] != -3 && |
1646: (num_functions == 1 || dof_func[i1] == dof_func_offd[j1])) |
1647: sum_N += A_offd_data[j]; |
1648: if (j1 != -1 && tmp_marker_offd[j1] == i1) |
1649: { |
1650: P_offd_data[cnt_offd] = A_offd_data[j]; |
1651: P_offd_j[cnt_offd++] = map_S_to_new[j1]; |
1652: sum_C += A_offd_data[j]; |
1653: } |
1654: } |
1655: diagonal = A_diag_data[A_diag_i[i1]]; |
1656: if (sum_C*diagonal) alfa = -sum_N/(sum_C*diagonal); |
1657: for (j=P_diag_i[i1]; j < cnt; j++) |
1658: P_diag_data[j] *= alfa; |
1659: for (j=P_offd_i[i1]; j < cnt_offd; j++) |
1660: P_offd_data[j] *= alfa; |
1661: } |
1662: hypre_TFree(tmp_marker); |
1663: hypre_TFree(tmp_marker_offd); |
0x447bc0 PUSH %RBP |
0x447bc1 MOV %RSP,%RBP |
0x447bc4 PUSH %R15 |
0x447bc6 PUSH %R14 |
0x447bc8 PUSH %R13 |
0x447bca PUSH %R12 |
0x447bcc PUSH %RBX |
0x447bcd SUB $0xe8,%RSP |
0x447bd4 MOV %R9,-0xd0(%RBP) |
0x447bdb MOV %R8,-0x40(%RBP) |
0x447bdf MOV %RCX,-0xb0(%RBP) |
0x447be6 MOV %RDX,-0x50(%RBP) |
0x447bea MOV 0xe8(%RBP),%RAX |
0x447bf1 MOV %RAX,-0x38(%RBP) |
0x447bf5 MOV 0xe0(%RBP),%RAX |
0x447bfc MOV %RAX,-0x48(%RBP) |
0x447c00 MOV 0xd8(%RBP),%RDI |
0x447c07 MOV 0xd0(%RBP),%RAX |
0x447c0e MOV %RAX,-0x110(%RBP) |
0x447c15 MOV 0xc8(%RBP),%RAX |
0x447c1c MOV %RAX,-0x108(%RBP) |
0x447c23 MOV 0xc0(%RBP),%RAX |
0x447c2a MOV %RAX,-0xc8(%RBP) |
0x447c31 MOV 0xb8(%RBP),%RAX |
0x447c38 MOV %RAX,-0xc0(%RBP) |
0x447c3f MOV 0xb0(%RBP),%RAX |
0x447c46 MOV %RAX,-0xe8(%RBP) |
0x447c4d MOV 0xa8(%RBP),%RAX |
0x447c54 MOV %RAX,-0xe0(%RBP) |
0x447c5b MOV 0xa0(%RBP),%RAX |
0x447c62 MOV %RAX,-0x58(%RBP) |
0x447c66 MOV 0x98(%RBP),%RAX |
0x447c6d MOV %RAX,-0xd8(%RBP) |
0x447c74 MOV 0x90(%RBP),%R15 |
0x447c7b MOV 0x88(%RBP),%R12 |
0x447c82 MOV 0x80(%RBP),%RAX |
0x447c89 MOV %RAX,-0xf0(%RBP) |
0x447c90 MOV 0x78(%RBP),%RAX |
0x447c94 MOV %RAX,-0x90(%RBP) |
0x447c9b MOV 0x70(%RBP),%RAX |
0x447c9f MOV %RAX,-0x100(%RBP) |
0x447ca6 MOV 0x68(%RBP),%RAX |
0x447caa MOV %RAX,-0x68(%RBP) |
0x447cae MOV 0x60(%RBP),%RAX |
0x447cb2 MOV %RAX,-0xa0(%RBP) |
0x447cb9 MOV 0x58(%RBP),%RAX |
0x447cbd MOV %RAX,-0xf8(%RBP) |
0x447cc4 MOV 0x50(%RBP),%RAX |
0x447cc8 MOV %RAX,-0x60(%RBP) |
0x447ccc MOV 0x48(%RBP),%RAX |
0x447cd0 MOV %RAX,-0x78(%RBP) |
0x447cd4 MOV 0x40(%RBP),%RCX |
0x447cd8 MOV 0x38(%RBP),%RAX |
0x447cdc MOV %RAX,-0xb8(%RBP) |
0x447ce3 MOV 0x30(%RBP),%RAX |
0x447ce7 MOV %RAX,-0x98(%RBP) |
0x447cee MOV 0x28(%RBP),%RAX |
0x447cf2 MOV %RAX,-0xa8(%RBP) |
0x447cf9 MOV 0x20(%RBP),%RAX |
0x447cfd MOV %RAX,-0x88(%RBP) |
0x447d04 MOV 0x18(%RBP),%RAX |
0x447d08 MOV %RAX,-0x80(%RBP) |
0x447d0c MOV 0x10(%RBP),%RAX |
0x447d10 MOV %RAX,-0x70(%RBP) |
0x447d14 TEST %RDI,%RDI |
0x447d17 MOV %RCX,-0x30(%RBP) |
0x447d1b JE 447dc0 |
0x447d21 MOV $0x8,%ESI |
0x447d26 MOV %RDI,%RBX |
0x447d29 CALL 4f3da0 <hypre_CAlloc> |
0x447d2e MOV -0x30(%RBP),%RCX |
0x447d32 MOV %RAX,%R13 |
0x447d35 TEST %RCX,%RCX |
0x447d38 JE 447dcf |
0x447d3e MOV $0x8,%ESI |
0x447d43 MOV %RCX,%RDI |
0x447d46 CALL 4f3da0 <hypre_CAlloc> |
0x447d4b MOV %RAX,%R14 |
0x447d4e TEST %RBX,%RBX |
0x447d51 JLE 447d67 |
0x447d53 SAL $0x3,%RBX |
0x447d57 MOV %R13,%RDI |
0x447d5a MOV $0xff,%ESI |
0x447d5f MOV %RBX,%RDX |
0x447d62 CALL 4fe630 <__intel_avx_rep_memset> |
0x447d67 MOV -0x30(%RBP),%RDX |
0x447d6b TEST %RDX,%RDX |
0x447d6e JLE 447d81 |
0x447d70 SAL $0x3,%RDX |
0x447d74 MOV %R14,%RDI |
0x447d77 MOV $0xff,%ESI |
0x447d7c CALL 4fe630 <__intel_avx_rep_memset> |
0x447d81 CALL 4f6100 <hypre_GetThreadNum> |
0x447d86 MOV %RAX,-0x30(%RBP) |
0x447d8a CALL 4f60f0 <hypre_NumActiveThreads> |
0x447d8f MOV %RAX,%RCX |
0x447d92 MOV -0x58(%RBP),%RAX |
0x447d96 MOV 0x8(%RAX),%R10 |
0x447d9a MOV -0x38(%RBP),%RBX |
0x447d9e MOV %RBX,%RAX |
0x447da1 OR %RCX,%RAX |
0x447da4 SHR $0x20,%RAX |
0x447da8 JE 447de0 |
0x447daa MOV %RBX,%RAX |
0x447dad CQTO |
0x447daf IDIV %RCX |
0x447db2 JMP 447de6 |
0x447db4 NOPW %CS:(%RAX,%RAX,1) |
0x447dc0 MOV %RDI,%RBX |
0x447dc3 XOR %R13D,%R13D |
0x447dc6 TEST %RCX,%RCX |
0x447dc9 JNE 447d3e |
0x447dcf XOR %R14D,%R14D |
0x447dd2 TEST %RBX,%RBX |
0x447dd5 JG 447d53 |
0x447ddb JMP 447d67 |
0x447ddd NOPL (%RAX) |
0x447de0 MOV %EBX,%EAX |
0x447de2 XOR %EDX,%EDX |
0x447de4 DIV %ECX |
0x447de6 MOV -0x40(%RBP),%R11 |
0x447dea MOV -0x50(%RBP),%R8 |
0x447dee MOV -0x48(%RBP),%R9 |
0x447df2 MOV %RAX,%RDX |
0x447df5 MOV -0x30(%RBP),%RSI |
0x447df9 IMUL %RSI,%RDX |
0x447dfd DEC %RCX |
0x447e00 LEA 0x1(%RSI),%RDI |
0x447e04 IMUL %RAX,%RDI |
0x447e08 CMP %RCX,%RSI |
0x447e0b CMOVE %RBX,%RDI |
0x447e0f MOV %RDI,-0x38(%RBP) |
0x447e13 CMP %RDI,%RDX |
0x447e16 JGE 448340 |
0x447e1c ADD %R10,-0x38(%RBP) |
0x447e20 ADD %R10,%RDX |
0x447e23 VMOVDDUP 0xb8cd5(%RIP),%XMM0 |
0x447e2b VXORPD %XMM1,%XMM1,%XMM1 |
0x447e2f JMP 447e59 |
0x447e31 NOPW %CS:(%RAX,%RAX,1) |
(852) 0x447e40 MOV -0x58(%RBP),%RDX |
(852) 0x447e44 INC %RDX |
(852) 0x447e47 CMP -0x38(%RBP),%RDX |
(852) 0x447e4b MOV -0x50(%RBP),%R8 |
(852) 0x447e4f MOV -0x48(%RBP),%R9 |
(852) 0x447e53 JGE 448340 |
(852) 0x447e59 MOV -0xd8(%RBP),%RAX |
(852) 0x447e60 MOV %RDX,-0x58(%RBP) |
(852) 0x447e64 MOV (%RAX,%RDX,8),%RAX |
(852) 0x447e68 MOV -0xe0(%RBP),%RCX |
(852) 0x447e6f MOV (%RCX,%RAX,8),%RDX |
(852) 0x447e73 MOV -0x60(%RBP),%RCX |
(852) 0x447e77 MOV (%RCX,%RAX,8),%RSI |
(852) 0x447e7b MOV 0x8(%RCX,%RAX,8),%RCX |
(852) 0x447e80 LEA (%RCX,%RDX,1),%RDI |
(852) 0x447e84 SUB %RSI,%RDI |
(852) 0x447e87 CMP %RDI,%RDX |
(852) 0x447e8a JGE 447f55 |
(852) 0x447e90 MOV -0xc0(%RBP),%RDI |
(852) 0x447e97 MOV 0x8(%RDI),%R10 |
(852) 0x447e9b SUB %RSI,%RCX |
(852) 0x447e9e CMP $0x8,%RCX |
(852) 0x447ea2 JB 447f30 |
(852) 0x447ea8 MOV %RCX,%RDI |
(852) 0x447eab SHR $0x3,%RDI |
(852) 0x447eaf LEA (%R10,%RDX,8),%RSI |
(852) 0x447eb3 ADD $0x38,%RSI |
(852) 0x447eb7 NOPW (%RAX,%RAX,1) |
(862) 0x447ec0 MOV -0x38(%RSI),%RBX |
(862) 0x447ec4 MOV (%R12,%RBX,8),%RBX |
(862) 0x447ec8 MOV %RAX,(%R13,%RBX,8) |
(862) 0x447ecd MOV -0x30(%RSI),%RBX |
(862) 0x447ed1 MOV (%R12,%RBX,8),%RBX |
(862) 0x447ed5 MOV %RAX,(%R13,%RBX,8) |
(862) 0x447eda MOV -0x28(%RSI),%RBX |
(862) 0x447ede MOV (%R12,%RBX,8),%RBX |
(862) 0x447ee2 MOV %RAX,(%R13,%RBX,8) |
(862) 0x447ee7 MOV -0x20(%RSI),%RBX |
(862) 0x447eeb MOV (%R12,%RBX,8),%RBX |
(862) 0x447eef MOV %RAX,(%R13,%RBX,8) |
(862) 0x447ef4 MOV -0x18(%RSI),%RBX |
(862) 0x447ef8 MOV (%R12,%RBX,8),%RBX |
(862) 0x447efc MOV %RAX,(%R13,%RBX,8) |
(862) 0x447f01 MOV -0x10(%RSI),%RBX |
(862) 0x447f05 MOV (%R12,%RBX,8),%RBX |
(862) 0x447f09 MOV %RAX,(%R13,%RBX,8) |
(862) 0x447f0e MOV -0x8(%RSI),%RBX |
(862) 0x447f12 MOV (%R12,%RBX,8),%RBX |
(862) 0x447f16 MOV %RAX,(%R13,%RBX,8) |
(862) 0x447f1b MOV (%RSI),%RBX |
(862) 0x447f1e MOV (%R12,%RBX,8),%RBX |
(862) 0x447f22 MOV %RAX,(%R13,%RBX,8) |
(862) 0x447f27 ADD $0x40,%RSI |
(862) 0x447f2b DEC %RDI |
(862) 0x447f2e JNE 447ec0 |
(852) 0x447f30 MOV %RCX,%RSI |
(852) 0x447f33 AND $-0x8,%RSI |
(852) 0x447f37 CMP %RCX,%RSI |
(852) 0x447f3a JAE 447f55 |
(852) 0x447f3c LEA (%R10,%RDX,8),%RDX |
(861) 0x447f40 MOV (%RDX,%RSI,8),%RDI |
(861) 0x447f44 MOV (%R12,%RDI,8),%RDI |
(861) 0x447f48 MOV %RAX,(%R13,%RDI,8) |
(861) 0x447f4d INC %RSI |
(861) 0x447f50 CMP %RSI,%RCX |
(861) 0x447f53 JNE 447f40 |
(852) 0x447f55 MOV -0x60(%RBP),%RCX |
(852) 0x447f59 MOV (%RCX,%RAX,8),%RSI |
(852) 0x447f5d MOV -0x80(%RBP),%RDX |
(852) 0x447f61 MOV (%RDX,%RAX,8),%RCX |
(852) 0x447f65 MOV 0x8(%RDX,%RAX,8),%RDX |
(852) 0x447f6a INC %RCX |
(852) 0x447f6d VXORPD %XMM4,%XMM4,%XMM4 |
(852) 0x447f71 CMP %RDX,%RCX |
(852) 0x447f74 MOV %RSI,-0x30(%RBP) |
(852) 0x447f78 VXORPD %XMM3,%XMM3,%XMM3 |
(852) 0x447f7c JGE 448020 |
(852) 0x447f82 VXORPD %XMM4,%XMM4,%XMM4 |
(852) 0x447f86 MOV -0x88(%RBP),%RDI |
(852) 0x447f8d MOV -0x70(%RBP),%R10 |
(852) 0x447f91 JMP 447fa8 |
0x447f93 NOPW %CS:(%RAX,%RAX,1) |
(860) 0x447fa0 INC %RCX |
(860) 0x447fa3 CMP %RDX,%RCX |
(860) 0x447fa6 JGE 448024 |
(860) 0x447fa8 MOV (%RDI,%RCX,8),%RSI |
(860) 0x447fac CMPQ $-0x3,(%R8,%RSI,8) |
(860) 0x447fb1 JE 447fd4 |
(860) 0x447fb3 CMPQ $0x1,-0xb0(%RBP) |
(860) 0x447fbb JE 447fce |
(860) 0x447fbd MOV (%R11,%RAX,8),%RDI |
(860) 0x447fc1 CMP (%R11,%RSI,8),%RDI |
(860) 0x447fc5 MOV -0x88(%RBP),%RDI |
(860) 0x447fcc JNE 447fd4 |
(860) 0x447fce VADDSD (%R10,%RCX,8),%XMM3,%XMM3 |
(860) 0x447fd4 CMP $-0x1,%RSI |
(860) 0x447fd8 JE 447fa0 |
(860) 0x447fda CMP %RAX,(%R13,%RSI,8) |
(860) 0x447fdf JNE 447fa0 |
(860) 0x447fe1 VMOVSD (%R10,%RCX,8),%XMM5 |
(860) 0x447fe7 MOV -0x78(%RBP),%RDX |
(860) 0x447feb MOV -0x30(%RBP),%RBX |
(860) 0x447fef VMOVSD %XMM5,(%RDX,%RBX,8) |
(860) 0x447ff4 MOV (%R9,%RSI,8),%RDX |
(860) 0x447ff8 MOV -0xf8(%RBP),%RSI |
(860) 0x447fff MOV %RDX,(%RSI,%RBX,8) |
(860) 0x448003 INC %RBX |
(860) 0x448006 MOV %RBX,-0x30(%RBP) |
(860) 0x44800a VADDSD (%R10,%RCX,8),%XMM4,%XMM4 |
(860) 0x448010 MOV -0x80(%RBP),%RDX |
(860) 0x448014 MOV 0x8(%RDX,%RAX,8),%RDX |
(860) 0x448019 JMP 447fa0 |
0x44801b NOPL (%RAX,%RAX,1) |
(852) 0x448020 MOV -0x70(%RBP),%R10 |
(852) 0x448024 MOV -0xe8(%RBP),%RCX |
(852) 0x44802b MOV (%RCX,%RAX,8),%RDX |
(852) 0x44802f MOV -0x68(%RBP),%RCX |
(852) 0x448033 MOV (%RCX,%RAX,8),%RSI |
(852) 0x448037 MOV 0x8(%RCX,%RAX,8),%RCX |
(852) 0x44803c LEA (%RCX,%RDX,1),%RDI |
(852) 0x448040 SUB %RSI,%RDI |
(852) 0x448043 CMP %RDI,%RDX |
(852) 0x448046 JGE 448104 |
(852) 0x44804c MOV -0xc8(%RBP),%RDI |
(852) 0x448053 MOV 0x8(%RDI),%R11 |
(852) 0x448057 SUB %RSI,%RCX |
(852) 0x44805a CMP $0x8,%RCX |
(852) 0x44805e JB 4480d8 |
(852) 0x448060 MOV %RCX,%RDI |
(852) 0x448063 SHR $0x3,%RDI |
(852) 0x448067 LEA (%R11,%RDX,8),%RSI |
(852) 0x44806b ADD $0x38,%RSI |
(852) 0x44806f NOP |
(859) 0x448070 MOV -0x38(%RSI),%RBX |
(859) 0x448074 MOV (%R15,%RBX,8),%RBX |
(859) 0x448078 MOV %RAX,(%R14,%RBX,8) |
(859) 0x44807c MOV -0x30(%RSI),%RBX |
(859) 0x448080 MOV (%R15,%RBX,8),%RBX |
(859) 0x448084 MOV %RAX,(%R14,%RBX,8) |
(859) 0x448088 MOV -0x28(%RSI),%RBX |
(859) 0x44808c MOV (%R15,%RBX,8),%RBX |
(859) 0x448090 MOV %RAX,(%R14,%RBX,8) |
(859) 0x448094 MOV -0x20(%RSI),%RBX |
(859) 0x448098 MOV (%R15,%RBX,8),%RBX |
(859) 0x44809c MOV %RAX,(%R14,%RBX,8) |
(859) 0x4480a0 MOV -0x18(%RSI),%RBX |
(859) 0x4480a4 MOV (%R15,%RBX,8),%RBX |
(859) 0x4480a8 MOV %RAX,(%R14,%RBX,8) |
(859) 0x4480ac MOV -0x10(%RSI),%RBX |
(859) 0x4480b0 MOV (%R15,%RBX,8),%RBX |
(859) 0x4480b4 MOV %RAX,(%R14,%RBX,8) |
(859) 0x4480b8 MOV -0x8(%RSI),%RBX |
(859) 0x4480bc MOV (%R15,%RBX,8),%RBX |
(859) 0x4480c0 MOV %RAX,(%R14,%RBX,8) |
(859) 0x4480c4 MOV (%RSI),%RBX |
(859) 0x4480c7 MOV (%R15,%RBX,8),%RBX |
(859) 0x4480cb MOV %RAX,(%R14,%RBX,8) |
(859) 0x4480cf ADD $0x40,%RSI |
(859) 0x4480d3 DEC %RDI |
(859) 0x4480d6 JNE 448070 |
(852) 0x4480d8 MOV %RCX,%RSI |
(852) 0x4480db AND $-0x8,%RSI |
(852) 0x4480df CMP %RCX,%RSI |
(852) 0x4480e2 JAE 448104 |
(852) 0x4480e4 LEA (%R11,%RDX,8),%RDX |
(852) 0x4480e8 NOPL (%RAX,%RAX,1) |
(858) 0x4480f0 MOV (%RDX,%RSI,8),%RDI |
(858) 0x4480f4 MOV (%R15,%RDI,8),%RDI |
(858) 0x4480f8 MOV %RAX,(%R14,%RDI,8) |
(858) 0x4480fc INC %RSI |
(858) 0x4480ff CMP %RSI,%RCX |
(858) 0x448102 JNE 4480f0 |
(852) 0x448104 MOV -0x68(%RBP),%RCX |
(852) 0x448108 MOV (%RCX,%RAX,8),%R9 |
(852) 0x44810c MOV -0x98(%RBP),%RCX |
(852) 0x448113 MOV (%RCX,%RAX,8),%RSI |
(852) 0x448117 MOV 0x8(%RCX,%RAX,8),%RCX |
(852) 0x44811c CMP %RCX,%RSI |
(852) 0x44811f JGE 448200 |
(852) 0x448125 MOV -0xb8(%RBP),%RDX |
(852) 0x44812c LEA (%RDX,%RSI,8),%RDI |
(852) 0x448130 MOV -0xd0(%RBP),%R8 |
(852) 0x448137 MOV -0x90(%RBP),%R11 |
(852) 0x44813e JMP 448150 |
(857) 0x448140 INC %RSI |
(857) 0x448143 ADD $0x8,%RDI |
(857) 0x448147 CMP %RCX,%RSI |
(857) 0x44814a JGE 448200 |
(857) 0x448150 MOV %RDI,%RDX |
(857) 0x448153 TEST %R8,%R8 |
(857) 0x448156 JE 448166 |
(857) 0x448158 MOV (%RDI),%RDX |
(857) 0x44815b MOV -0x110(%RBP),%RBX |
(857) 0x448162 LEA (%RBX,%RDX,8),%RDX |
(857) 0x448166 MOV (%RDX),%RBX |
(857) 0x448169 CMPQ $-0x3,(%R11,%RBX,8) |
(857) 0x44816e JE 4481a2 |
(857) 0x448170 CMPQ $0x1,-0xb0(%RBP) |
(857) 0x448178 JE 448196 |
(857) 0x44817a MOV -0x40(%RBP),%RDX |
(857) 0x44817e MOV (%RDX,%RAX,8),%RDX |
(857) 0x448182 MOV -0xf0(%RBP),%R11 |
(857) 0x448189 CMP (%R11,%RBX,8),%RDX |
(857) 0x44818d MOV -0x90(%RBP),%R11 |
(857) 0x448194 JNE 4481a2 |
(857) 0x448196 MOV -0xa8(%RBP),%RDX |
(857) 0x44819d VADDSD (%RDX,%RSI,8),%XMM3,%XMM3 |
(857) 0x4481a2 CMP $-0x1,%RBX |
(857) 0x4481a6 JE 448140 |
(857) 0x4481a8 CMP %RAX,(%R14,%RBX,8) |
(857) 0x4481ac JNE 448140 |
(857) 0x4481ae MOV -0xa8(%RBP),%R10 |
(857) 0x4481b5 VMOVSD (%R10,%RSI,8),%XMM5 |
(857) 0x4481bb MOV -0xa0(%RBP),%RCX |
(857) 0x4481c2 VMOVSD %XMM5,(%RCX,%R9,8) |
(857) 0x4481c8 MOV -0x108(%RBP),%RCX |
(857) 0x4481cf MOV (%RCX,%RBX,8),%RCX |
(857) 0x4481d3 MOV -0x100(%RBP),%RDX |
(857) 0x4481da MOV %RCX,(%RDX,%R9,8) |
(857) 0x4481de INC %R9 |
(857) 0x4481e1 VADDSD (%R10,%RSI,8),%XMM4,%XMM4 |
(857) 0x4481e7 MOV -0x70(%RBP),%R10 |
(857) 0x4481eb MOV -0x98(%RBP),%RCX |
(857) 0x4481f2 MOV 0x8(%RCX,%RAX,8),%RCX |
(857) 0x4481f7 JMP 448140 |
0x4481fc NOPL (%RAX) |
(852) 0x448200 MOV -0x80(%RBP),%RCX |
(852) 0x448204 MOV (%RCX,%RAX,8),%RCX |
(852) 0x448208 VMULSD (%R10,%RCX,8),%XMM4,%XMM4 |
(852) 0x44820e VUCOMISD %XMM1,%XMM4 |
(852) 0x448212 JE 44821c |
(852) 0x448214 VXORPD %XMM0,%XMM3,%XMM2 |
(852) 0x448218 VDIVSD %XMM4,%XMM2,%XMM2 |
(852) 0x44821c MOV -0x60(%RBP),%RCX |
(852) 0x448220 MOV (%RCX,%RAX,8),%R11 |
(852) 0x448224 MOV -0x30(%RBP),%RSI |
(852) 0x448228 MOV %RSI,%R10 |
(852) 0x44822b SUB %R11,%R10 |
(852) 0x44822e MOV -0xa0(%RBP),%RDI |
(852) 0x448235 MOV -0x78(%RBP),%RDX |
(852) 0x448239 JLE 4482a2 |
(852) 0x44823b MOV %R10,%RCX |
(852) 0x44823e AND $-0x4,%RCX |
(852) 0x448242 JE 448280 |
(852) 0x448244 LEA -0x1(%RCX),%RBX |
(852) 0x448248 VBROADCASTSD %XMM2,%YMM3 |
(852) 0x44824d LEA (%RDX,%R11,8),%RDX |
(852) 0x448251 XOR %R8D,%R8D |
(852) 0x448254 NOPW %CS:(%RAX,%RAX,1) |
(856) 0x448260 VMULPD (%RDX,%R8,8),%YMM3,%YMM4 |
(856) 0x448266 VMOVUPD %YMM4,(%RDX,%R8,8) |
(856) 0x44826c ADD $0x4,%R8 |
(856) 0x448270 CMP %RBX,%R8 |
(856) 0x448273 JBE 448260 |
(852) 0x448275 CMP %RCX,%R10 |
(852) 0x448278 MOV -0x78(%RBP),%RDX |
(852) 0x44827c JNE 448282 |
(852) 0x44827e JMP 4482a2 |
(852) 0x448280 XOR %ECX,%ECX |
(852) 0x448282 ADD %R11,%RCX |
(852) 0x448285 NOPW %CS:(%RAX,%RAX,1) |
(855) 0x448290 VMULSD (%RDX,%RCX,8),%XMM2,%XMM3 |
(855) 0x448295 VMOVSD %XMM3,(%RDX,%RCX,8) |
(855) 0x44829a INC %RCX |
(855) 0x44829d CMP %RCX,%RSI |
(855) 0x4482a0 JNE 448290 |
(852) 0x4482a2 MOV -0x68(%RBP),%RCX |
(852) 0x4482a6 MOV (%RCX,%RAX,8),%R8 |
(852) 0x4482aa MOV %R9,%RDX |
(852) 0x4482ad SUB %R8,%RDX |
(852) 0x4482b0 MOV -0x40(%RBP),%R11 |
(852) 0x4482b4 JLE 447e40 |
(852) 0x4482ba MOV %RDX,%RAX |
(852) 0x4482bd AND $-0x4,%RAX |
(852) 0x4482c1 JE 448300 |
(852) 0x4482c3 LEA -0x1(%RAX),%RSI |
(852) 0x4482c7 VBROADCASTSD %XMM2,%YMM3 |
(852) 0x4482cc LEA (%RDI,%R8,8),%RBX |
(852) 0x4482d0 XOR %ECX,%ECX |
(852) 0x4482d2 NOPW %CS:(%RAX,%RAX,1) |
(854) 0x4482e0 VMULPD (%RBX,%RCX,8),%YMM3,%YMM4 |
(854) 0x4482e5 VMOVUPD %YMM4,(%RBX,%RCX,8) |
(854) 0x4482ea ADD $0x4,%RCX |
(854) 0x4482ee CMP %RSI,%RCX |
(854) 0x4482f1 JBE 4482e0 |
(852) 0x4482f3 CMP %RAX,%RDX |
(852) 0x4482f6 JE 447e40 |
(852) 0x4482fc JMP 448302 |
0x4482fe XCHG %AX,%AX |
(852) 0x448300 XOR %EAX,%EAX |
(852) 0x448302 ADD %R8,%RAX |
(852) 0x448305 NOPW %CS:(%RAX,%RAX,1) |
(853) 0x448310 VMULSD (%RDI,%RAX,8),%XMM2,%XMM3 |
(853) 0x448315 VMOVSD %XMM3,(%RDI,%RAX,8) |
(853) 0x44831a INC %RAX |
(853) 0x44831d CMP %RAX,%R9 |
(853) 0x448320 JNE 448310 |
(852) 0x448322 JMP 447e40 |
0x448327 NOPW %CS:(%RAX,%RAX,1) |
0x448336 NOPW %CS:(%RAX,%RAX,1) |
0x448340 MOV %R13,%RDI |
0x448343 VZEROUPPER |
0x448346 CALL 4f3eb0 <hypre_Free> |
0x44834b MOV %R14,%RDI |
0x44834e ADD $0xe8,%RSP |
0x448355 POP %RBX |
0x448356 POP %R12 |
0x448358 POP %R13 |
0x44835a POP %R14 |
0x44835c POP %R15 |
0x44835e POP %RBP |
0x44835f JMP 4f3eb0 |
0x448364 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | hypre_BoomerAMGBuildMultipass | par_multi_interp.c:1575 | exec |
○ | hypre_BoomerAMGSetup | par_amg_setup.c:737 | exec |
○ | hypre_PCGSetup | pcg.c:234 | exec |
○ | main | amg.c:398 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | par_multi_interp.c:1575-1663 |
Module | exec |
nb instructions | 159 |
nb uops | 235 |
loop length | 737 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 57 |
micro-operation queue | 58.75 cycles |
front end | 58.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 28.00 | 28.00 | 30.17 | 29.83 | 45.00 | 28.00 | 28.00 | 30.00 |
cycles | 28.00 | 28.00 | 30.17 | 29.83 | 45.00 | 28.00 | 28.00 | 30.00 |
Cycles executing div or sqrt instructions | 30.00-96.00 |
FE+BE cycles | 42.51-96.63 |
Stall cycles | 1.02-55.15 |
SB full (events) | 1.03-9.00 |
Front-end | 58.75 |
Dispatch | 45.00 |
DIV/SQRT | 30.00-96.00 |
Overall L1 | 58.75-96.00 |
all | 1% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 3% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 6% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0xe8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R9,-0xd0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,-0xb0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xe8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xe0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xd8(%RBP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0xd0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x110(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x108(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xc0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xe8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xe0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xd8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x90(%RBP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x88(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x80(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x78(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x70(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x100(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x68(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x60(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xa0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x40(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xa8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
TEST %RDI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JE 447dc0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4f3da0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x30(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RCX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 447dcf | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4f3da0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RBX,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 447d67 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SAL $0x3,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0xff,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4fe630 <__intel_avx_rep_memset> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %RDX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 447d81 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0xff,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 4fe630 <__intel_avx_rep_memset> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CALL 4f6100 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 4f60f0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%RAX),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RBX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
OR %RCX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
JE 447de0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RBX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
IDIV %RCX | 57 | 14.25 | 14.25 | 0 | 0 | 0 | 14.25 | 14.25 | 0 | 42-95 | 24-90 |
JMP 447de6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RCX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 447d3e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RBX,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JG 447d53 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 447d67 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
DIV %ECX | 10 | 2.50 | 2.50 | 0 | 0 | 0 | 2.50 | 2.50 | 0 | 26 | 6 |
MOV -0x40(%RBP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x50(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
IMUL %RSI,%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x1(%RSI),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %RAX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
CMP %RCX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVE %RBX,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %RDI,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP %RDI,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 448340 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %R10,-0x38(%RBP) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
ADD %R10,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDDUP 0xb8cd5(%RIP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 447e59 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4f3eb0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0xe8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 4f3eb0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | par_multi_interp.c:1575-1663 |
Module | exec |
nb instructions | 159 |
nb uops | 235 |
loop length | 737 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 57 |
micro-operation queue | 58.75 cycles |
front end | 58.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 28.00 | 28.00 | 30.17 | 29.83 | 45.00 | 28.00 | 28.00 | 30.00 |
cycles | 28.00 | 28.00 | 30.17 | 29.83 | 45.00 | 28.00 | 28.00 | 30.00 |
Cycles executing div or sqrt instructions | 30.00-96.00 |
FE+BE cycles | 42.51-96.63 |
Stall cycles | 1.02-55.15 |
SB full (events) | 1.03-9.00 |
Front-end | 58.75 |
Dispatch | 45.00 |
DIV/SQRT | 30.00-96.00 |
Overall L1 | 58.75-96.00 |
all | 1% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 3% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 6% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0xe8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R9,-0xd0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,-0xb0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xe8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xe0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xd8(%RBP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0xd0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x110(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x108(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xc0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xe8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xe0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xd8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x90(%RBP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x88(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x80(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x78(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x70(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x100(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x68(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x60(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xa0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x40(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xa8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
TEST %RDI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JE 447dc0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4f3da0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x30(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RCX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 447dcf | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4f3da0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RBX,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 447d67 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SAL $0x3,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0xff,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4fe630 <__intel_avx_rep_memset> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %RDX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 447d81 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0xff,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 4fe630 <__intel_avx_rep_memset> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CALL 4f6100 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 4f60f0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%RAX),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RBX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
OR %RCX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
JE 447de0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RBX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
IDIV %RCX | 57 | 14.25 | 14.25 | 0 | 0 | 0 | 14.25 | 14.25 | 0 | 42-95 | 24-90 |
JMP 447de6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RCX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 447d3e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RBX,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JG 447d53 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 447d67 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
DIV %ECX | 10 | 2.50 | 2.50 | 0 | 0 | 0 | 2.50 | 2.50 | 0 | 26 | 6 |
MOV -0x40(%RBP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x50(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
IMUL %RSI,%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x1(%RSI),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %RAX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
CMP %RCX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVE %RBX,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %RDI,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP %RDI,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 448340 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %R10,-0x38(%RBP) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
ADD %R10,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDDUP 0xb8cd5(%RIP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 447e59 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4f3eb0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0xe8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 4f3eb0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_BoomerAMGBuildMultipass.extracted.27– | 0.57 | 0.21 |
▼Loop 852 - par_multi_interp.c:1585-1660 - exec– | 0.16 | 0.06 |
○Loop 860 - par_multi_interp.c:1618-1628 - exec | 0.37 | 0.14 |
○Loop 861 - par_multi_interp.c:1612-1615 - exec | 0.04 | 0.01 |
○Loop 854 - par_multi_interp.c:1659-1660 - exec | 0 | 0 |
○Loop 856 - par_multi_interp.c:1657-1658 - exec | 0 | 0 |
○Loop 859 - par_multi_interp.c:1633-1636 - exec | 0 | 0 |
○Loop 862 - par_multi_interp.c:1612-1615 - exec | 0 | 0 |
○Loop 855 - par_multi_interp.c:1657-1658 - exec | 0 | 0 |
○Loop 853 - par_multi_interp.c:1659-1660 - exec | 0 | 0 |
○Loop 857 - par_multi_interp.c:1622-1652 - exec | 0 | 0 |
○Loop 858 - par_multi_interp.c:1633-1636 - exec | 0 | 0 |