Function: hypre_BoomerAMGBuildMultipass.extracted.27 | Module: exec | Source: par_multi_interp.c:1575-1663 [...] | Coverage: 0.59% |
---|
Function: hypre_BoomerAMGBuildMultipass.extracted.27 | Module: exec | Source: par_multi_interp.c:1575-1663 [...] | Coverage: 0.59% |
---|
/home/kcamus/qaas_runs/169-443-9681/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c: 1575 - 1663 |
-------------------------------------------------------------------------------- |
1575: #pragma omp parallel private(thread_start,thread_stop,my_thread_num,num_threads,k,k1,i,i1,j,j1,sum_C,sum_N,j_start,j_end,cnt,tmp_marker,tmp_marker_offd,cnt_offd,diagonal,alfa) |
[...] |
1585: if (n_fine) |
1586: { tmp_marker = hypre_CTAlloc(HYPRE_Int,n_fine); } |
1587: tmp_marker_offd = NULL; |
1588: if (num_cols_offd) |
1589: { tmp_marker_offd = hypre_CTAlloc(HYPRE_Int,num_cols_offd); } |
1590: for (i=0; i < n_fine; i++) |
1591: { tmp_marker[i] = -1; } |
1592: for (i=0; i < num_cols_offd; i++) |
1593: { tmp_marker_offd[i] = -1; } |
1594: |
1595: /* Compute this thread's range of pass_length */ |
1596: my_thread_num = hypre_GetThreadNum(); |
1597: num_threads = hypre_NumActiveThreads(); |
1598: thread_start = pass_pointer[1] + (pass_length/num_threads)*my_thread_num; |
1599: if (my_thread_num == num_threads-1) |
[...] |
1605: for (i=thread_start; i < thread_stop; i++) |
1606: { |
1607: i1 = pass_array[i]; |
1608: sum_C = 0; |
1609: sum_N = 0; |
1610: j_start = P_diag_start[i1]; |
1611: j_end = j_start+P_diag_i[i1+1]-P_diag_i[i1]; |
1612: for (j=j_start; j < j_end; j++) |
1613: { |
1614: k1 = P_diag_pass[1][j]; |
1615: tmp_marker[C_array[k1]] = i1; |
1616: } |
1617: cnt = P_diag_i[i1]; |
1618: for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++) |
1619: { |
1620: j1 = A_diag_j[j]; |
1621: if (CF_marker[j1] != -3 && |
1622: (num_functions == 1 || dof_func[i1] == dof_func[j1])) |
1623: sum_N += A_diag_data[j]; |
1624: if (j1 != -1 && tmp_marker[j1] == i1) |
1625: { |
1626: P_diag_data[cnt] = A_diag_data[j]; |
1627: P_diag_j[cnt++] = fine_to_coarse[j1]; |
1628: sum_C += A_diag_data[j]; |
1629: } |
1630: } |
1631: j_start = P_offd_start[i1]; |
1632: j_end = j_start+P_offd_i[i1+1]-P_offd_i[i1]; |
1633: for (j=j_start; j < j_end; j++) |
1634: { |
1635: k1 = P_offd_pass[1][j]; |
1636: tmp_marker_offd[C_array_offd[k1]] = i1; |
1637: } |
1638: cnt_offd = P_offd_i[i1]; |
1639: for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++) |
1640: { |
1641: if (col_offd_S_to_A) |
1642: j1 = map_A_to_S[A_offd_j[j]]; |
1643: else |
1644: j1 = A_offd_j[j]; |
1645: if (CF_marker_offd[j1] != -3 && |
1646: (num_functions == 1 || dof_func[i1] == dof_func_offd[j1])) |
1647: sum_N += A_offd_data[j]; |
1648: if (j1 != -1 && tmp_marker_offd[j1] == i1) |
1649: { |
1650: P_offd_data[cnt_offd] = A_offd_data[j]; |
1651: P_offd_j[cnt_offd++] = map_S_to_new[j1]; |
1652: sum_C += A_offd_data[j]; |
1653: } |
1654: } |
1655: diagonal = A_diag_data[A_diag_i[i1]]; |
1656: if (sum_C*diagonal) alfa = -sum_N/(sum_C*diagonal); |
1657: for (j=P_diag_i[i1]; j < cnt; j++) |
1658: P_diag_data[j] *= alfa; |
1659: for (j=P_offd_i[i1]; j < cnt_offd; j++) |
1660: P_offd_data[j] *= alfa; |
1661: } |
1662: hypre_TFree(tmp_marker); |
1663: hypre_TFree(tmp_marker_offd); |
0x442250 PUSH %RBP |
0x442251 MOV %RSP,%RBP |
0x442254 PUSH %R15 |
0x442256 PUSH %R14 |
0x442258 PUSH %R13 |
0x44225a PUSH %R12 |
0x44225c PUSH %RBX |
0x44225d SUB $0xe8,%RSP |
0x442264 MOV %R9,-0xd0(%RBP) |
0x44226b MOV %R8,-0x40(%RBP) |
0x44226f MOV %RCX,-0xb0(%RBP) |
0x442276 MOV %RDX,-0x50(%RBP) |
0x44227a MOV 0xe8(%RBP),%RAX |
0x442281 MOV %RAX,-0x38(%RBP) |
0x442285 MOV 0xe0(%RBP),%RAX |
0x44228c MOV %RAX,-0x48(%RBP) |
0x442290 MOV 0xd8(%RBP),%RDI |
0x442297 MOV 0xd0(%RBP),%RAX |
0x44229e MOV %RAX,-0x110(%RBP) |
0x4422a5 MOV 0xc8(%RBP),%RAX |
0x4422ac MOV %RAX,-0x108(%RBP) |
0x4422b3 MOV 0xc0(%RBP),%RAX |
0x4422ba MOV %RAX,-0xc8(%RBP) |
0x4422c1 MOV 0xb8(%RBP),%RAX |
0x4422c8 MOV %RAX,-0xc0(%RBP) |
0x4422cf MOV 0xb0(%RBP),%RAX |
0x4422d6 MOV %RAX,-0xe8(%RBP) |
0x4422dd MOV 0xa8(%RBP),%RAX |
0x4422e4 MOV %RAX,-0xe0(%RBP) |
0x4422eb MOV 0xa0(%RBP),%RAX |
0x4422f2 MOV %RAX,-0x58(%RBP) |
0x4422f6 MOV 0x98(%RBP),%RAX |
0x4422fd MOV %RAX,-0xd8(%RBP) |
0x442304 MOV 0x90(%RBP),%R15 |
0x44230b MOV 0x88(%RBP),%R12 |
0x442312 MOV 0x80(%RBP),%RAX |
0x442319 MOV %RAX,-0xf0(%RBP) |
0x442320 MOV 0x78(%RBP),%RAX |
0x442324 MOV %RAX,-0x90(%RBP) |
0x44232b MOV 0x70(%RBP),%RAX |
0x44232f MOV %RAX,-0x100(%RBP) |
0x442336 MOV 0x68(%RBP),%RAX |
0x44233a MOV %RAX,-0x68(%RBP) |
0x44233e MOV 0x60(%RBP),%RAX |
0x442342 MOV %RAX,-0xa0(%RBP) |
0x442349 MOV 0x58(%RBP),%RAX |
0x44234d MOV %RAX,-0xf8(%RBP) |
0x442354 MOV 0x50(%RBP),%RAX |
0x442358 MOV %RAX,-0x60(%RBP) |
0x44235c MOV 0x48(%RBP),%RAX |
0x442360 MOV %RAX,-0x78(%RBP) |
0x442364 MOV 0x40(%RBP),%RCX |
0x442368 MOV 0x38(%RBP),%RAX |
0x44236c MOV %RAX,-0xb8(%RBP) |
0x442373 MOV 0x30(%RBP),%RAX |
0x442377 MOV %RAX,-0x98(%RBP) |
0x44237e MOV 0x28(%RBP),%RAX |
0x442382 MOV %RAX,-0xa8(%RBP) |
0x442389 MOV 0x20(%RBP),%RAX |
0x44238d MOV %RAX,-0x88(%RBP) |
0x442394 MOV 0x18(%RBP),%RAX |
0x442398 MOV %RAX,-0x80(%RBP) |
0x44239c MOV 0x10(%RBP),%RAX |
0x4423a0 MOV %RAX,-0x70(%RBP) |
0x4423a4 TEST %RDI,%RDI |
0x4423a7 MOV %RCX,-0x30(%RBP) |
0x4423ab JE 442444 |
0x4423b1 MOV $0x8,%ESI |
0x4423b6 MOV %RDI,%RBX |
0x4423b9 CALL 4dd8f0 <hypre_CAlloc> |
0x4423be MOV -0x30(%RBP),%RCX |
0x4423c2 MOV %RAX,%R13 |
0x4423c5 TEST %RCX,%RCX |
0x4423c8 JE 442453 |
0x4423ce MOV $0x8,%ESI |
0x4423d3 MOV %RCX,%RDI |
0x4423d6 CALL 4dd8f0 <hypre_CAlloc> |
0x4423db MOV %RAX,%R14 |
0x4423de TEST %RBX,%RBX |
0x4423e1 JLE 4423f7 |
0x4423e3 SAL $0x3,%RBX |
0x4423e7 MOV %R13,%RDI |
0x4423ea MOV $0xff,%ESI |
0x4423ef MOV %RBX,%RDX |
0x4423f2 CALL 4e6020 <_intel_fast_memset> |
0x4423f7 MOV -0x30(%RBP),%RDX |
0x4423fb TEST %RDX,%RDX |
0x4423fe JLE 442411 |
0x442400 SAL $0x3,%RDX |
0x442404 MOV %R14,%RDI |
0x442407 MOV $0xff,%ESI |
0x44240c CALL 4e6020 <_intel_fast_memset> |
0x442411 CALL 4df9d0 <hypre_GetThreadNum> |
0x442416 MOV %RAX,-0x30(%RBP) |
0x44241a CALL 4df9c0 <hypre_NumActiveThreads> |
0x44241f MOV %RAX,%RCX |
0x442422 MOV -0x58(%RBP),%RAX |
0x442426 MOV 0x8(%RAX),%R10 |
0x44242a MOV -0x38(%RBP),%RBX |
0x44242e MOV %RBX,%RAX |
0x442431 OR %RCX,%RAX |
0x442434 SHR $0x20,%RAX |
0x442438 JE 44245d |
0x44243a MOV %RBX,%RAX |
0x44243d CQTO |
0x44243f IDIV %RCX |
0x442442 JMP 442463 |
0x442444 MOV %RDI,%RBX |
0x442447 XOR %R13D,%R13D |
0x44244a TEST %RCX,%RCX |
0x44244d JNE 4423ce |
0x442453 XOR %R14D,%R14D |
0x442456 TEST %RBX,%RBX |
0x442459 JG 4423e3 |
0x44245b JMP 4423f7 |
0x44245d MOV %EBX,%EAX |
0x44245f XOR %EDX,%EDX |
0x442461 DIV %ECX |
0x442463 MOV -0x40(%RBP),%R11 |
0x442467 MOV -0x50(%RBP),%R8 |
0x44246b MOV -0x48(%RBP),%R9 |
0x44246f MOV %RAX,%RDX |
0x442472 MOV -0x30(%RBP),%RSI |
0x442476 IMUL %RSI,%RDX |
0x44247a DEC %RCX |
0x44247d LEA 0x1(%RSI),%RDI |
0x442481 IMUL %RAX,%RDI |
0x442485 CMP %RCX,%RSI |
0x442488 CMOVE %RBX,%RDI |
0x44248c MOV %RDI,-0x38(%RBP) |
0x442490 CMP %RDI,%RDX |
0x442493 JGE 442997 |
0x442499 ADD %R10,-0x38(%RBP) |
0x44249d ADD %R10,%RDX |
0x4424a0 VMOVDDUP 0xb3618(%RIP),%XMM0 |
0x4424a8 VXORPD %XMM1,%XMM1,%XMM1 |
0x4424ac JMP 4424c9 |
0x4424ae XCHG %AX,%AX |
(836) 0x4424b0 MOV -0x58(%RBP),%RDX |
(836) 0x4424b4 INC %RDX |
(836) 0x4424b7 CMP -0x38(%RBP),%RDX |
(836) 0x4424bb MOV -0x50(%RBP),%R8 |
(836) 0x4424bf MOV -0x48(%RBP),%R9 |
(836) 0x4424c3 JGE 442997 |
(836) 0x4424c9 MOV -0xd8(%RBP),%RAX |
(836) 0x4424d0 MOV %RDX,-0x58(%RBP) |
(836) 0x4424d4 MOV (%RAX,%RDX,8),%RAX |
(836) 0x4424d8 MOV -0xe0(%RBP),%RCX |
(836) 0x4424df MOV (%RCX,%RAX,8),%RDX |
(836) 0x4424e3 MOV -0x60(%RBP),%RCX |
(836) 0x4424e7 MOV (%RCX,%RAX,8),%RSI |
(836) 0x4424eb MOV 0x8(%RCX,%RAX,8),%RCX |
(836) 0x4424f0 LEA (%RCX,%RDX,1),%RDI |
(836) 0x4424f4 SUB %RSI,%RDI |
(836) 0x4424f7 CMP %RDI,%RDX |
(836) 0x4424fa JGE 4425c5 |
(836) 0x442500 MOV -0xc0(%RBP),%RDI |
(836) 0x442507 MOV 0x8(%RDI),%R10 |
(836) 0x44250b SUB %RSI,%RCX |
(836) 0x44250e CMP $0x8,%RCX |
(836) 0x442512 JB 4425a0 |
(836) 0x442518 MOV %RCX,%RDI |
(836) 0x44251b SHR $0x3,%RDI |
(836) 0x44251f LEA (%R10,%RDX,8),%RSI |
(836) 0x442523 ADD $0x38,%RSI |
(836) 0x442527 NOPW (%RAX,%RAX,1) |
(846) 0x442530 MOV -0x38(%RSI),%RBX |
(846) 0x442534 MOV (%R12,%RBX,8),%RBX |
(846) 0x442538 MOV %RAX,(%R13,%RBX,8) |
(846) 0x44253d MOV -0x30(%RSI),%RBX |
(846) 0x442541 MOV (%R12,%RBX,8),%RBX |
(846) 0x442545 MOV %RAX,(%R13,%RBX,8) |
(846) 0x44254a MOV -0x28(%RSI),%RBX |
(846) 0x44254e MOV (%R12,%RBX,8),%RBX |
(846) 0x442552 MOV %RAX,(%R13,%RBX,8) |
(846) 0x442557 MOV -0x20(%RSI),%RBX |
(846) 0x44255b MOV (%R12,%RBX,8),%RBX |
(846) 0x44255f MOV %RAX,(%R13,%RBX,8) |
(846) 0x442564 MOV -0x18(%RSI),%RBX |
(846) 0x442568 MOV (%R12,%RBX,8),%RBX |
(846) 0x44256c MOV %RAX,(%R13,%RBX,8) |
(846) 0x442571 MOV -0x10(%RSI),%RBX |
(846) 0x442575 MOV (%R12,%RBX,8),%RBX |
(846) 0x442579 MOV %RAX,(%R13,%RBX,8) |
(846) 0x44257e MOV -0x8(%RSI),%RBX |
(846) 0x442582 MOV (%R12,%RBX,8),%RBX |
(846) 0x442586 MOV %RAX,(%R13,%RBX,8) |
(846) 0x44258b MOV (%RSI),%RBX |
(846) 0x44258e MOV (%R12,%RBX,8),%RBX |
(846) 0x442592 MOV %RAX,(%R13,%RBX,8) |
(846) 0x442597 ADD $0x40,%RSI |
(846) 0x44259b DEC %RDI |
(846) 0x44259e JNE 442530 |
(836) 0x4425a0 MOV %RCX,%RSI |
(836) 0x4425a3 AND $-0x8,%RSI |
(836) 0x4425a7 CMP %RCX,%RSI |
(836) 0x4425aa JAE 4425c5 |
(836) 0x4425ac LEA (%R10,%RDX,8),%RDX |
(845) 0x4425b0 MOV (%RDX,%RSI,8),%RDI |
(845) 0x4425b4 MOV (%R12,%RDI,8),%RDI |
(845) 0x4425b8 MOV %RAX,(%R13,%RDI,8) |
(845) 0x4425bd INC %RSI |
(845) 0x4425c0 CMP %RSI,%RCX |
(845) 0x4425c3 JNE 4425b0 |
(836) 0x4425c5 MOV -0x60(%RBP),%RCX |
(836) 0x4425c9 MOV (%RCX,%RAX,8),%RSI |
(836) 0x4425cd MOV -0x80(%RBP),%RDX |
(836) 0x4425d1 MOV (%RDX,%RAX,8),%RCX |
(836) 0x4425d5 MOV 0x8(%RDX,%RAX,8),%RDX |
(836) 0x4425da INC %RCX |
(836) 0x4425dd VXORPD %XMM4,%XMM4,%XMM4 |
(836) 0x4425e1 CMP %RDX,%RCX |
(836) 0x4425e4 MOV %RSI,-0x30(%RBP) |
(836) 0x4425e8 VXORPD %XMM3,%XMM3,%XMM3 |
(836) 0x4425ec JGE 442690 |
(836) 0x4425f2 VXORPD %XMM4,%XMM4,%XMM4 |
(836) 0x4425f6 MOV -0x88(%RBP),%RDI |
(836) 0x4425fd MOV -0x70(%RBP),%R10 |
(836) 0x442601 JMP 442618 |
0x442603 NOPW %CS:(%RAX,%RAX,1) |
(844) 0x442610 INC %RCX |
(844) 0x442613 CMP %RDX,%RCX |
(844) 0x442616 JGE 442694 |
(844) 0x442618 MOV (%RDI,%RCX,8),%RSI |
(844) 0x44261c CMPQ $-0x3,(%R8,%RSI,8) |
(844) 0x442621 JE 442644 |
(844) 0x442623 CMPQ $0x1,-0xb0(%RBP) |
(844) 0x44262b JE 44263e |
(844) 0x44262d MOV (%R11,%RAX,8),%RDI |
(844) 0x442631 CMP (%R11,%RSI,8),%RDI |
(844) 0x442635 MOV -0x88(%RBP),%RDI |
(844) 0x44263c JNE 442644 |
(844) 0x44263e VADDSD (%R10,%RCX,8),%XMM3,%XMM3 |
(844) 0x442644 CMP $-0x1,%RSI |
(844) 0x442648 JE 442610 |
(844) 0x44264a CMP %RAX,(%R13,%RSI,8) |
(844) 0x44264f JNE 442610 |
(844) 0x442651 VMOVSD (%R10,%RCX,8),%XMM5 |
(844) 0x442657 MOV -0x78(%RBP),%RDX |
(844) 0x44265b MOV -0x30(%RBP),%RBX |
(844) 0x44265f VMOVSD %XMM5,(%RDX,%RBX,8) |
(844) 0x442664 MOV (%R9,%RSI,8),%RDX |
(844) 0x442668 MOV -0xf8(%RBP),%RSI |
(844) 0x44266f MOV %RDX,(%RSI,%RBX,8) |
(844) 0x442673 INC %RBX |
(844) 0x442676 MOV %RBX,-0x30(%RBP) |
(844) 0x44267a VADDSD (%R10,%RCX,8),%XMM4,%XMM4 |
(844) 0x442680 MOV -0x80(%RBP),%RDX |
(844) 0x442684 MOV 0x8(%RDX,%RAX,8),%RDX |
(844) 0x442689 JMP 442610 |
0x44268b NOPL (%RAX,%RAX,1) |
(836) 0x442690 MOV -0x70(%RBP),%R10 |
(836) 0x442694 MOV -0xe8(%RBP),%RCX |
(836) 0x44269b MOV (%RCX,%RAX,8),%RDX |
(836) 0x44269f MOV -0x68(%RBP),%RCX |
(836) 0x4426a3 MOV (%RCX,%RAX,8),%RSI |
(836) 0x4426a7 MOV 0x8(%RCX,%RAX,8),%RCX |
(836) 0x4426ac LEA (%RCX,%RDX,1),%RDI |
(836) 0x4426b0 SUB %RSI,%RDI |
(836) 0x4426b3 CMP %RDI,%RDX |
(836) 0x4426b6 JGE 442774 |
(836) 0x4426bc MOV -0xc8(%RBP),%RDI |
(836) 0x4426c3 MOV 0x8(%RDI),%R11 |
(836) 0x4426c7 SUB %RSI,%RCX |
(836) 0x4426ca CMP $0x8,%RCX |
(836) 0x4426ce JB 442748 |
(836) 0x4426d0 MOV %RCX,%RDI |
(836) 0x4426d3 SHR $0x3,%RDI |
(836) 0x4426d7 LEA (%R11,%RDX,8),%RSI |
(836) 0x4426db ADD $0x38,%RSI |
(836) 0x4426df NOP |
(843) 0x4426e0 MOV -0x38(%RSI),%RBX |
(843) 0x4426e4 MOV (%R15,%RBX,8),%RBX |
(843) 0x4426e8 MOV %RAX,(%R14,%RBX,8) |
(843) 0x4426ec MOV -0x30(%RSI),%RBX |
(843) 0x4426f0 MOV (%R15,%RBX,8),%RBX |
(843) 0x4426f4 MOV %RAX,(%R14,%RBX,8) |
(843) 0x4426f8 MOV -0x28(%RSI),%RBX |
(843) 0x4426fc MOV (%R15,%RBX,8),%RBX |
(843) 0x442700 MOV %RAX,(%R14,%RBX,8) |
(843) 0x442704 MOV -0x20(%RSI),%RBX |
(843) 0x442708 MOV (%R15,%RBX,8),%RBX |
(843) 0x44270c MOV %RAX,(%R14,%RBX,8) |
(843) 0x442710 MOV -0x18(%RSI),%RBX |
(843) 0x442714 MOV (%R15,%RBX,8),%RBX |
(843) 0x442718 MOV %RAX,(%R14,%RBX,8) |
(843) 0x44271c MOV -0x10(%RSI),%RBX |
(843) 0x442720 MOV (%R15,%RBX,8),%RBX |
(843) 0x442724 MOV %RAX,(%R14,%RBX,8) |
(843) 0x442728 MOV -0x8(%RSI),%RBX |
(843) 0x44272c MOV (%R15,%RBX,8),%RBX |
(843) 0x442730 MOV %RAX,(%R14,%RBX,8) |
(843) 0x442734 MOV (%RSI),%RBX |
(843) 0x442737 MOV (%R15,%RBX,8),%RBX |
(843) 0x44273b MOV %RAX,(%R14,%RBX,8) |
(843) 0x44273f ADD $0x40,%RSI |
(843) 0x442743 DEC %RDI |
(843) 0x442746 JNE 4426e0 |
(836) 0x442748 MOV %RCX,%RSI |
(836) 0x44274b AND $-0x8,%RSI |
(836) 0x44274f CMP %RCX,%RSI |
(836) 0x442752 JAE 442774 |
(836) 0x442754 LEA (%R11,%RDX,8),%RDX |
(836) 0x442758 NOPL (%RAX,%RAX,1) |
(842) 0x442760 MOV (%RDX,%RSI,8),%RDI |
(842) 0x442764 MOV (%R15,%RDI,8),%RDI |
(842) 0x442768 MOV %RAX,(%R14,%RDI,8) |
(842) 0x44276c INC %RSI |
(842) 0x44276f CMP %RSI,%RCX |
(842) 0x442772 JNE 442760 |
(836) 0x442774 MOV -0x68(%RBP),%RCX |
(836) 0x442778 MOV (%RCX,%RAX,8),%R9 |
(836) 0x44277c MOV -0x98(%RBP),%RCX |
(836) 0x442783 MOV (%RCX,%RAX,8),%RSI |
(836) 0x442787 MOV 0x8(%RCX,%RAX,8),%RCX |
(836) 0x44278c CMP %RCX,%RSI |
(836) 0x44278f JGE 442870 |
(836) 0x442795 MOV -0xb8(%RBP),%RDX |
(836) 0x44279c LEA (%RDX,%RSI,8),%RDI |
(836) 0x4427a0 MOV -0xd0(%RBP),%R8 |
(836) 0x4427a7 MOV -0x90(%RBP),%R11 |
(836) 0x4427ae JMP 4427c0 |
(841) 0x4427b0 INC %RSI |
(841) 0x4427b3 ADD $0x8,%RDI |
(841) 0x4427b7 CMP %RCX,%RSI |
(841) 0x4427ba JGE 442870 |
(841) 0x4427c0 MOV %RDI,%RDX |
(841) 0x4427c3 TEST %R8,%R8 |
(841) 0x4427c6 JE 4427d6 |
(841) 0x4427c8 MOV (%RDI),%RDX |
(841) 0x4427cb MOV -0x110(%RBP),%RBX |
(841) 0x4427d2 LEA (%RBX,%RDX,8),%RDX |
(841) 0x4427d6 MOV (%RDX),%RBX |
(841) 0x4427d9 CMPQ $-0x3,(%R11,%RBX,8) |
(841) 0x4427de JE 442812 |
(841) 0x4427e0 CMPQ $0x1,-0xb0(%RBP) |
(841) 0x4427e8 JE 442806 |
(841) 0x4427ea MOV -0x40(%RBP),%RDX |
(841) 0x4427ee MOV (%RDX,%RAX,8),%RDX |
(841) 0x4427f2 MOV -0xf0(%RBP),%R11 |
(841) 0x4427f9 CMP (%R11,%RBX,8),%RDX |
(841) 0x4427fd MOV -0x90(%RBP),%R11 |
(841) 0x442804 JNE 442812 |
(841) 0x442806 MOV -0xa8(%RBP),%RDX |
(841) 0x44280d VADDSD (%RDX,%RSI,8),%XMM3,%XMM3 |
(841) 0x442812 CMP $-0x1,%RBX |
(841) 0x442816 JE 4427b0 |
(841) 0x442818 CMP %RAX,(%R14,%RBX,8) |
(841) 0x44281c JNE 4427b0 |
(841) 0x44281e MOV -0xa8(%RBP),%R10 |
(841) 0x442825 VMOVSD (%R10,%RSI,8),%XMM5 |
(841) 0x44282b MOV -0xa0(%RBP),%RCX |
(841) 0x442832 VMOVSD %XMM5,(%RCX,%R9,8) |
(841) 0x442838 MOV -0x108(%RBP),%RCX |
(841) 0x44283f MOV (%RCX,%RBX,8),%RCX |
(841) 0x442843 MOV -0x100(%RBP),%RDX |
(841) 0x44284a MOV %RCX,(%RDX,%R9,8) |
(841) 0x44284e INC %R9 |
(841) 0x442851 VADDSD (%R10,%RSI,8),%XMM4,%XMM4 |
(841) 0x442857 MOV -0x70(%RBP),%R10 |
(841) 0x44285b MOV -0x98(%RBP),%RCX |
(841) 0x442862 MOV 0x8(%RCX,%RAX,8),%RCX |
(841) 0x442867 JMP 4427b0 |
0x44286c NOPL (%RAX) |
(836) 0x442870 MOV -0x80(%RBP),%RCX |
(836) 0x442874 MOV (%RCX,%RAX,8),%RCX |
(836) 0x442878 VMULSD (%R10,%RCX,8),%XMM4,%XMM4 |
(836) 0x44287e VUCOMISD %XMM1,%XMM4 |
(836) 0x442882 JE 44288c |
(836) 0x442884 VXORPD %XMM0,%XMM3,%XMM2 |
(836) 0x442888 VDIVSD %XMM4,%XMM2,%XMM2 |
(836) 0x44288c MOV -0x60(%RBP),%RCX |
(836) 0x442890 MOV (%RCX,%RAX,8),%R11 |
(836) 0x442894 MOV -0x30(%RBP),%RSI |
(836) 0x442898 MOV %RSI,%R10 |
(836) 0x44289b SUB %R11,%R10 |
(836) 0x44289e MOV -0xa0(%RBP),%RDI |
(836) 0x4428a5 MOV -0x78(%RBP),%RDX |
(836) 0x4428a9 JLE 442912 |
(836) 0x4428ab MOV %R10,%RCX |
(836) 0x4428ae AND $-0x4,%RCX |
(836) 0x4428b2 JE 4428f0 |
(836) 0x4428b4 LEA -0x1(%RCX),%RBX |
(836) 0x4428b8 VBROADCASTSD %XMM2,%YMM3 |
(836) 0x4428bd LEA (%RDX,%R11,8),%RDX |
(836) 0x4428c1 XOR %R8D,%R8D |
(836) 0x4428c4 NOPW %CS:(%RAX,%RAX,1) |
(840) 0x4428d0 VMULPD (%RDX,%R8,8),%YMM3,%YMM4 |
(840) 0x4428d6 VMOVUPD %YMM4,(%RDX,%R8,8) |
(840) 0x4428dc ADD $0x4,%R8 |
(840) 0x4428e0 CMP %RBX,%R8 |
(840) 0x4428e3 JBE 4428d0 |
(836) 0x4428e5 CMP %RCX,%R10 |
(836) 0x4428e8 MOV -0x78(%RBP),%RDX |
(836) 0x4428ec JNE 4428f2 |
(836) 0x4428ee JMP 442912 |
(836) 0x4428f0 XOR %ECX,%ECX |
(836) 0x4428f2 ADD %R11,%RCX |
(836) 0x4428f5 NOPW %CS:(%RAX,%RAX,1) |
(839) 0x442900 VMULSD (%RDX,%RCX,8),%XMM2,%XMM3 |
(839) 0x442905 VMOVSD %XMM3,(%RDX,%RCX,8) |
(839) 0x44290a INC %RCX |
(839) 0x44290d CMP %RCX,%RSI |
(839) 0x442910 JNE 442900 |
(836) 0x442912 MOV -0x68(%RBP),%RCX |
(836) 0x442916 MOV (%RCX,%RAX,8),%R8 |
(836) 0x44291a MOV %R9,%RDX |
(836) 0x44291d SUB %R8,%RDX |
(836) 0x442920 MOV -0x40(%RBP),%R11 |
(836) 0x442924 JLE 4424b0 |
(836) 0x44292a MOV %RDX,%RAX |
(836) 0x44292d AND $-0x4,%RAX |
(836) 0x442931 JE 44296e |
(836) 0x442933 LEA -0x1(%RAX),%RSI |
(836) 0x442937 VBROADCASTSD %XMM2,%YMM3 |
(836) 0x44293c LEA (%RDI,%R8,8),%RBX |
(836) 0x442940 XOR %ECX,%ECX |
(836) 0x442942 NOPW %CS:(%RAX,%RAX,1) |
(838) 0x442950 VMULPD (%RBX,%RCX,8),%YMM3,%YMM4 |
(838) 0x442955 VMOVUPD %YMM4,(%RBX,%RCX,8) |
(838) 0x44295a ADD $0x4,%RCX |
(838) 0x44295e CMP %RSI,%RCX |
(838) 0x442961 JBE 442950 |
(836) 0x442963 CMP %RAX,%RDX |
(836) 0x442966 JE 4424b0 |
(836) 0x44296c JMP 442970 |
(836) 0x44296e XOR %EAX,%EAX |
(836) 0x442970 ADD %R8,%RAX |
(836) 0x442973 NOPW %CS:(%RAX,%RAX,1) |
(837) 0x442980 VMULSD (%RDI,%RAX,8),%XMM2,%XMM3 |
(837) 0x442985 VMOVSD %XMM3,(%RDI,%RAX,8) |
(837) 0x44298a INC %RAX |
(837) 0x44298d CMP %RAX,%R9 |
(837) 0x442990 JNE 442980 |
(836) 0x442992 JMP 4424b0 |
0x442997 MOV %R13,%RDI |
0x44299a VZEROUPPER |
0x44299d CALL 4dd9d0 <hypre_Free> |
0x4429a2 MOV %R14,%RDI |
0x4429a5 ADD $0xe8,%RSP |
0x4429ac POP %RBX |
0x4429ad POP %R12 |
0x4429af POP %R13 |
0x4429b1 POP %R14 |
0x4429b3 POP %R15 |
0x4429b5 POP %RBP |
0x4429b6 JMP 4dd9d0 |
0x4429bb NOPL (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | hypre_BoomerAMGBuildMultipass | par_multi_interp.c:1575 | exec |
○ | hypre_BoomerAMGSetup | par_amg_setup.c:737 | exec |
○ | hypre_PCGSetup | pcg.c:234 | exec |
○ | main | amg.c:398 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | par_multi_interp.c:1575-1663 |
Module | exec |
nb instructions | 154 |
nb uops | 230 |
loop length | 671 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 57 |
micro-operation queue | 57.50 cycles |
front end | 57.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 28.00 | 28.00 | 30.17 | 29.83 | 45.00 | 28.00 | 28.00 | 30.00 |
cycles | 28.00 | 28.00 | 30.17 | 29.83 | 45.00 | 28.00 | 28.00 | 30.00 |
Cycles executing div or sqrt instructions | 30.00-96.00 |
FE+BE cycles | 42.17-96.62 |
Stall cycles | 1.95-56.40 |
SB full (events) | 2.70-13.99 |
Front-end | 57.50 |
Dispatch | 45.00 |
DIV/SQRT | 30.00-96.00 |
Overall L1 | 57.50-96.00 |
all | 1% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 3% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 6% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0xe8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R9,-0xd0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,-0xb0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xe8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xe0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xd8(%RBP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0xd0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x110(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x108(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xc0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xe8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xe0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xd8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x90(%RBP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x88(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x80(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x78(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x70(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x100(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x68(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x60(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xa0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x40(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xa8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
TEST %RDI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JE 442444 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4dd8f0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x30(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RCX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 442453 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4dd8f0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RBX,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4423f7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SAL $0x3,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0xff,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4e6020 <_intel_fast_memset> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %RDX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 442411 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0xff,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 4e6020 <_intel_fast_memset> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CALL 4df9d0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 4df9c0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%RAX),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RBX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
OR %RCX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
JE 44245d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RBX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
IDIV %RCX | 57 | 14.25 | 14.25 | 0 | 0 | 0 | 14.25 | 14.25 | 0 | 42-95 | 24-90 |
JMP 442463 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RCX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 4423ce | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RBX,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JG 4423e3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 4423f7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
DIV %ECX | 10 | 2.50 | 2.50 | 0 | 0 | 0 | 2.50 | 2.50 | 0 | 26 | 6 |
MOV -0x40(%RBP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x50(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
IMUL %RSI,%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x1(%RSI),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %RAX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
CMP %RCX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVE %RBX,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %RDI,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP %RDI,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 442997 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %R10,-0x38(%RBP) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
ADD %R10,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDDUP 0xb3618(%RIP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4424c9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4dd9d0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0xe8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 4dd9d0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | par_multi_interp.c:1575-1663 |
Module | exec |
nb instructions | 154 |
nb uops | 230 |
loop length | 671 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 57 |
micro-operation queue | 57.50 cycles |
front end | 57.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 28.00 | 28.00 | 30.17 | 29.83 | 45.00 | 28.00 | 28.00 | 30.00 |
cycles | 28.00 | 28.00 | 30.17 | 29.83 | 45.00 | 28.00 | 28.00 | 30.00 |
Cycles executing div or sqrt instructions | 30.00-96.00 |
FE+BE cycles | 42.17-96.62 |
Stall cycles | 1.95-56.40 |
SB full (events) | 2.70-13.99 |
Front-end | 57.50 |
Dispatch | 45.00 |
DIV/SQRT | 30.00-96.00 |
Overall L1 | 57.50-96.00 |
all | 1% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 3% |
all | 50% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 6% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 18% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 25% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0xe8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R9,-0xd0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,-0xb0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDX,-0x50(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xe8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xe0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xd8(%RBP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0xd0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x110(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x108(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xc0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xe8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xe0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xd8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x90(%RBP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x88(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x80(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x78(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x70(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x100(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x68(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x68(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x60(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xa0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x40(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xa8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
TEST %RDI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JE 442444 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4dd8f0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x30(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RCX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 442453 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4dd8f0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RBX,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4423f7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SAL $0x3,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0xff,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4e6020 <_intel_fast_memset> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
TEST %RDX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 442411 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0xff,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 4e6020 <_intel_fast_memset> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CALL 4df9d0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 4df9c0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%RAX),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RBX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
OR %RCX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
JE 44245d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RBX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
IDIV %RCX | 57 | 14.25 | 14.25 | 0 | 0 | 0 | 14.25 | 14.25 | 0 | 42-95 | 24-90 |
JMP 442463 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RCX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 4423ce | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RBX,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JG 4423e3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 4423f7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %EBX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
DIV %ECX | 10 | 2.50 | 2.50 | 0 | 0 | 0 | 2.50 | 2.50 | 0 | 26 | 6 |
MOV -0x40(%RBP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x50(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
IMUL %RSI,%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
DEC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA 0x1(%RSI),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %RAX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
CMP %RCX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVE %RBX,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %RDI,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP %RDI,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 442997 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %R10,-0x38(%RBP) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
ADD %R10,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDDUP 0xb3618(%RIP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4424c9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4dd9d0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0xe8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 4dd9d0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_BoomerAMGBuildMultipass.extracted.27– | 0.59 | 0.22 |
▼Loop 836 - par_multi_interp.c:1585-1660 - exec– | 0.08 | 0.03 |
○Loop 844 - par_multi_interp.c:1618-1628 - exec | 0.46 | 0.17 |
○Loop 845 - par_multi_interp.c:1612-1615 - exec | 0.04 | 0.01 |
○Loop 837 - par_multi_interp.c:1659-1660 - exec | 0 | 0 |
○Loop 838 - par_multi_interp.c:1659-1660 - exec | 0 | 0 |
○Loop 840 - par_multi_interp.c:1657-1658 - exec | 0 | 0 |
○Loop 842 - par_multi_interp.c:1633-1636 - exec | 0 | 0 |
○Loop 846 - par_multi_interp.c:1612-1615 - exec | 0 | 0 |
○Loop 839 - par_multi_interp.c:1657-1658 - exec | 0 | 0 |
○Loop 841 - par_multi_interp.c:1622-1652 - exec | 0 | 0 |
○Loop 843 - par_multi_interp.c:1633-1636 - exec | 0 | 0 |