Function: hypre_ParMatmul.extracted.12 | Module: exec | Source: par_csr_matop.c:829-995 [...] | Coverage: 4.04% |
---|
Function: hypre_ParMatmul.extracted.12 | Module: exec | Source: par_csr_matop.c:829-995 [...] | Coverage: 4.04% |
---|
/home/kcamus/qaas_runs/169-443-9681/intel/AMG/build/AMG/AMG/parcsr_mv/par_csr_matop.c: 829 - 995 |
-------------------------------------------------------------------------------- |
829: #pragma omp parallel |
[...] |
840: ii = hypre_GetThreadNum(); |
841: num_threads = hypre_NumActiveThreads(); |
842: size = num_rows_diag_A/num_threads; |
843: rest = num_rows_diag_A - size*num_threads; |
844: if (ii < rest) |
[...] |
854: jj_count_diag = C_diag_i[ns]; |
855: jj_count_offd = C_offd_i[ns]; |
856: if (num_cols_diag_B || num_cols_offd_C) |
857: B_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_B+num_cols_offd_C); |
858: for (i1 = 0; i1 < num_cols_diag_B+num_cols_offd_C; i1++) |
859: B_marker[i1] = -1; |
[...] |
865: for (i1 = ns; i1 < ne; i1++) |
[...] |
874: if ( allsquare ) |
875: { |
876: B_marker[i1] = jj_count_diag; |
877: C_diag_data[jj_count_diag] = zero; |
878: C_diag_j[jj_count_diag] = i1; |
879: jj_count_diag++; |
[...] |
886: if (num_cols_offd_A) |
887: { |
888: for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) |
889: { |
890: i2 = A_offd_j[jj2]; |
891: a_entry = A_offd_data[jj2]; |
[...] |
897: for (jj3 = B_ext_offd_i[i2]; jj3 < B_ext_offd_i[i2+1]; jj3++) |
898: { |
899: i3 = num_cols_diag_B+B_ext_offd_j[jj3]; |
[...] |
907: if (B_marker[i3] < jj_row_begin_offd) |
908: { |
909: B_marker[i3] = jj_count_offd; |
910: C_offd_data[jj_count_offd] = a_entry*B_ext_offd_data[jj3]; |
911: C_offd_j[jj_count_offd] = i3-num_cols_diag_B; |
912: jj_count_offd++; |
913: } |
914: else |
915: C_offd_data[B_marker[i3]] += a_entry*B_ext_offd_data[jj3]; |
916: } |
917: for (jj3 = B_ext_diag_i[i2]; jj3 < B_ext_diag_i[i2+1]; jj3++) |
918: { |
919: i3 = B_ext_diag_j[jj3]; |
920: if (B_marker[i3] < jj_row_begin_diag) |
921: { |
922: B_marker[i3] = jj_count_diag; |
923: C_diag_data[jj_count_diag] = a_entry*B_ext_diag_data[jj3]; |
924: C_diag_j[jj_count_diag] = i3; |
925: jj_count_diag++; |
926: } |
927: else |
928: C_diag_data[B_marker[i3]] += a_entry*B_ext_diag_data[jj3]; |
[...] |
937: for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) |
938: { |
939: i2 = A_diag_j[jj2]; |
940: a_entry = A_diag_data[jj2]; |
[...] |
946: for (jj3 = B_diag_i[i2]; jj3 < B_diag_i[i2+1]; jj3++) |
947: { |
948: i3 = B_diag_j[jj3]; |
[...] |
956: if (B_marker[i3] < jj_row_begin_diag) |
957: { |
958: B_marker[i3] = jj_count_diag; |
959: C_diag_data[jj_count_diag] = a_entry*B_diag_data[jj3]; |
960: C_diag_j[jj_count_diag] = i3; |
961: jj_count_diag++; |
962: } |
963: else |
964: { |
965: C_diag_data[B_marker[i3]] += a_entry*B_diag_data[jj3]; |
966: } |
967: } |
968: if (num_cols_offd_B) |
969: { |
970: for (jj3 = B_offd_i[i2]; jj3 < B_offd_i[i2+1]; jj3++) |
971: { |
972: i3 = num_cols_diag_B+map_B_to_C[B_offd_j[jj3]]; |
[...] |
980: if (B_marker[i3] < jj_row_begin_offd) |
981: { |
982: B_marker[i3] = jj_count_offd; |
983: C_offd_data[jj_count_offd] = a_entry*B_offd_data[jj3]; |
984: C_offd_j[jj_count_offd] = i3-num_cols_diag_B; |
985: jj_count_offd++; |
986: } |
987: else |
988: { |
989: C_offd_data[B_marker[i3]] += a_entry*B_offd_data[jj3]; |
[...] |
995: hypre_TFree(B_marker); |
0x4c8c80 PUSH %RBP |
0x4c8c81 MOV %RSP,%RBP |
0x4c8c84 PUSH %R15 |
0x4c8c86 PUSH %R14 |
0x4c8c88 PUSH %R13 |
0x4c8c8a PUSH %R12 |
0x4c8c8c PUSH %RBX |
0x4c8c8d SUB $0xf8,%RSP |
0x4c8c94 MOV %R9,-0xc0(%RBP) |
0x4c8c9b MOV %R8,-0xe0(%RBP) |
0x4c8ca2 MOV %RCX,-0x68(%RBP) |
0x4c8ca6 MOV %RDX,-0xd8(%RBP) |
0x4c8cad MOV 0xe0(%RBP),%RAX |
0x4c8cb4 MOV %RAX,-0xa0(%RBP) |
0x4c8cbb MOV 0xd8(%RBP),%RAX |
0x4c8cc2 MOV %RAX,-0xb0(%RBP) |
0x4c8cc9 MOV 0xd0(%RBP),%RAX |
0x4c8cd0 MOV %RAX,-0x80(%RBP) |
0x4c8cd4 MOV 0xc8(%RBP),%RAX |
0x4c8cdb MOV %RAX,-0x108(%RBP) |
0x4c8ce2 MOV 0xc0(%RBP),%RAX |
0x4c8ce9 MOV %RAX,-0xa8(%RBP) |
0x4c8cf0 MOV 0xb8(%RBP),%RAX |
0x4c8cf7 MOV %RAX,-0x78(%RBP) |
0x4c8cfb MOV 0xb0(%RBP),%RAX |
0x4c8d02 MOV %RAX,-0x100(%RBP) |
0x4c8d09 MOV 0xa8(%RBP),%RAX |
0x4c8d10 MOV %RAX,-0x40(%RBP) |
0x4c8d14 MOV 0xa0(%RBP),%RAX |
0x4c8d1b MOV %RAX,-0x90(%RBP) |
0x4c8d22 MOV 0x98(%RBP),%RAX |
0x4c8d29 MOV %RAX,-0x30(%RBP) |
0x4c8d2d MOV 0x90(%RBP),%R14 |
0x4c8d34 MOV 0x88(%RBP),%RAX |
0x4c8d3b MOV %RAX,-0x50(%RBP) |
0x4c8d3f MOV 0x80(%RBP),%R12 |
0x4c8d46 MOV 0x78(%RBP),%R13 |
0x4c8d4a MOV 0x70(%RBP),%RAX |
0x4c8d4e MOV %RAX,-0xf8(%RBP) |
0x4c8d55 MOV 0x68(%RBP),%RAX |
0x4c8d59 MOV %RAX,-0xd0(%RBP) |
0x4c8d60 MOV 0x60(%RBP),%RAX |
0x4c8d64 MOV %RAX,-0x58(%RBP) |
0x4c8d68 MOV 0x58(%RBP),%RAX |
0x4c8d6c MOV %RAX,-0xf0(%RBP) |
0x4c8d73 MOV 0x50(%RBP),%RAX |
0x4c8d77 MOV %RAX,-0x70(%RBP) |
0x4c8d7b MOV 0x48(%RBP),%RAX |
0x4c8d7f MOV %RAX,-0xe8(%RBP) |
0x4c8d86 MOV 0x40(%RBP),%RAX |
0x4c8d8a MOV %RAX,-0x118(%RBP) |
0x4c8d91 MOV 0x38(%RBP),%RAX |
0x4c8d95 MOV %RAX,-0x88(%RBP) |
0x4c8d9c MOV 0x30(%RBP),%RAX |
0x4c8da0 MOV %RAX,-0x110(%RBP) |
0x4c8da7 MOV 0x28(%RBP),%RAX |
0x4c8dab MOV %RAX,-0x98(%RBP) |
0x4c8db2 MOV 0x20(%RBP),%R15 |
0x4c8db6 MOV 0x18(%RBP),%RAX |
0x4c8dba MOV %RAX,-0xb8(%RBP) |
0x4c8dc1 MOV 0x10(%RBP),%RAX |
0x4c8dc5 MOV %RAX,-0x60(%RBP) |
0x4c8dc9 CALL 4f6100 <hypre_GetThreadNum> |
0x4c8dce MOV %RAX,%RBX |
0x4c8dd1 CALL 4f60f0 <hypre_NumActiveThreads> |
0x4c8dd6 MOV %RAX,%RCX |
0x4c8dd9 OR %R15,%RAX |
0x4c8ddc SHR $0x20,%RAX |
0x4c8de0 JE 4c8e00 |
0x4c8de2 MOV %R15,%RAX |
0x4c8de5 CQTO |
0x4c8de7 IDIV %RCX |
0x4c8dea MOV %RAX,%RSI |
0x4c8ded JMP 4c8e09 |
0x4c8def NOPW %CS:(%RAX,%RAX,1) |
0x4c8dfe XCHG %AX,%AX |
0x4c8e00 MOV %R15D,%EAX |
0x4c8e03 XOR %EDX,%EDX |
0x4c8e05 DIV %ECX |
0x4c8e07 MOV %EAX,%ESI |
0x4c8e09 LEA 0x1(%RBX),%RAX |
0x4c8e0d MOV %RSI,%R15 |
0x4c8e10 IMUL %RAX,%R15 |
0x4c8e14 LEA 0x1(%RSI),%RCX |
0x4c8e18 IMUL %RBX,%RCX |
0x4c8e1c IMUL %RBX,%RSI |
0x4c8e20 ADD %RDX,%RSI |
0x4c8e23 CMP %RDX,%RBX |
0x4c8e26 CMOVL %RCX,%RSI |
0x4c8e2a CMOVL %RAX,%RDX |
0x4c8e2e MOV %RDX,-0x48(%RBP) |
0x4c8e32 MOV (%R12),%RAX |
0x4c8e36 MOV (%RAX,%RSI,8),%R12 |
0x4c8e3a MOV -0x30(%RBP),%RAX |
0x4c8e3e MOV (%RAX),%RAX |
0x4c8e41 MOV (%RAX,%RSI,8),%RAX |
0x4c8e45 MOV %RAX,-0x30(%RBP) |
0x4c8e49 MOV -0x40(%RBP),%R8 |
0x4c8e4d MOV (%R8),%RAX |
0x4c8e50 XOR %ECX,%ECX |
0x4c8e52 MOV -0x58(%RBP),%RBX |
0x4c8e56 MOV %RBX,%RDX |
0x4c8e59 OR %RAX,%RDX |
0x4c8e5c MOV $0,%EDI |
0x4c8e61 JE 4c8e85 |
0x4c8e63 ADD %RBX,%RAX |
0x4c8e66 MOV %RSI,-0x38(%RBP) |
0x4c8e6a MOV $0x8,%ESI |
0x4c8e6f MOV %RAX,%RDI |
0x4c8e72 CALL 4f3da0 <hypre_CAlloc> |
0x4c8e77 MOV -0x40(%RBP),%R8 |
0x4c8e7b MOV -0x38(%RBP),%RSI |
0x4c8e7f MOV %RAX,%RDI |
0x4c8e82 MOV (%R8),%RCX |
0x4c8e85 ADD %R15,-0x48(%RBP) |
0x4c8e89 ADD %RBX,%RCX |
0x4c8e8c JLE 4c8ea6 |
0x4c8e8e XOR %EAX,%EAX |
(3360) 0x4c8e90 MOVQ $-0x1,(%RDI,%RAX,8) |
(3360) 0x4c8e98 INC %RAX |
(3360) 0x4c8e9b MOV (%R8),%RCX |
(3360) 0x4c8e9e ADD %RBX,%RCX |
(3360) 0x4c8ea1 CMP %RCX,%RAX |
(3360) 0x4c8ea4 JL 4c8e90 |
0x4c8ea6 CMP -0x48(%RBP),%RSI |
0x4c8eaa JGE 4c9240 |
0x4c8eb0 MOV %R12,%R15 |
0x4c8eb3 JMP 4c8ed1 |
0x4c8eb5 NOPW %CS:(%RAX,%RAX,1) |
(3353) 0x4c8ec0 MOV %R11,-0x30(%RBP) |
(3353) 0x4c8ec4 MOV %R15,%R12 |
(3353) 0x4c8ec7 CMP -0x48(%RBP),%RSI |
(3353) 0x4c8ecb JGE 4c9240 |
(3353) 0x4c8ed1 MOV %RSI,%RCX |
(3353) 0x4c8ed4 CMPQ $0,-0xa0(%RBP) |
(3353) 0x4c8edc MOV -0x50(%RBP),%RAX |
(3353) 0x4c8ee0 JE 4c8ef8 |
(3353) 0x4c8ee2 MOV %R12,(%RDI,%RCX,8) |
(3353) 0x4c8ee6 MOVQ $0,(%R13,%R12,8) |
(3353) 0x4c8eef MOV %RCX,(%RAX,%R12,8) |
(3353) 0x4c8ef3 LEA 0x1(%R12),%R15 |
(3353) 0x4c8ef8 CMPQ $0,-0x98(%RBP) |
(3353) 0x4c8f00 JE 4c9080 |
(3353) 0x4c8f06 MOV -0x60(%RBP),%RAX |
(3353) 0x4c8f0a MOV (%RAX,%RCX,8),%RDX |
(3353) 0x4c8f0e LEA 0x1(%RCX),%RSI |
(3353) 0x4c8f12 CMP 0x8(%RAX,%RCX,8),%RDX |
(3353) 0x4c8f17 JGE 4c9084 |
(3353) 0x4c8f1d MOV %RSI,-0x38(%RBP) |
(3353) 0x4c8f21 MOV -0x30(%RBP),%R11 |
(3353) 0x4c8f25 MOV %RCX,-0xc8(%RBP) |
(3353) 0x4c8f2c MOV -0x108(%RBP),%R9 |
(3353) 0x4c8f33 JMP 4c8f5d |
0x4c8f35 NOPW %CS:(%RAX,%RAX,1) |
(3357) 0x4c8f40 MOV -0x40(%RBP),%RDX |
(3357) 0x4c8f44 INC %RDX |
(3357) 0x4c8f47 MOV -0x60(%RBP),%RAX |
(3357) 0x4c8f4b MOV -0xc8(%RBP),%RCX |
(3357) 0x4c8f52 CMP 0x8(%RAX,%RCX,8),%RDX |
(3357) 0x4c8f57 JGE 4c90a0 |
(3357) 0x4c8f5d MOV -0xb8(%RBP),%RAX |
(3357) 0x4c8f64 MOV (%RAX,%RDX,8),%R10 |
(3357) 0x4c8f68 MOV -0xc0(%RBP),%RAX |
(3357) 0x4c8f6f MOV %RDX,-0x40(%RBP) |
(3357) 0x4c8f73 VMOVSD (%RAX,%RDX,8),%XMM0 |
(3357) 0x4c8f78 MOV -0x80(%RBP),%RAX |
(3357) 0x4c8f7c MOV (%RAX,%R10,8),%RSI |
(3357) 0x4c8f80 MOV 0x8(%RAX,%R10,8),%RBX |
(3357) 0x4c8f85 CMP %RBX,%RSI |
(3357) 0x4c8f88 JGE 4c8ff9 |
(3357) 0x4c8f8a MOV -0xb0(%RBP),%RAX |
(3357) 0x4c8f91 JMP 4c8fb4 |
0x4c8f93 NOPW %CS:(%RAX,%RAX,1) |
(3359) 0x4c8fa0 VADDSD (%R14,%RDX,8),%XMM1,%XMM1 |
(3359) 0x4c8fa6 VMOVSD %XMM1,(%R14,%RDX,8) |
(3359) 0x4c8fac INC %RSI |
(3359) 0x4c8faf CMP %RBX,%RSI |
(3359) 0x4c8fb2 JGE 4c8ff9 |
(3359) 0x4c8fb4 MOV (%RAX,%RSI,8),%RCX |
(3359) 0x4c8fb8 MOV -0x58(%RBP),%RDX |
(3359) 0x4c8fbc LEA (%RCX,%RDX,1),%R8 |
(3359) 0x4c8fc0 MOV (%RDI,%R8,8),%RDX |
(3359) 0x4c8fc4 VMULSD (%R9,%RSI,8),%XMM0,%XMM1 |
(3359) 0x4c8fca CMP -0x30(%RBP),%RDX |
(3359) 0x4c8fce JGE 4c8fa0 |
(3359) 0x4c8fd0 MOV %R11,(%RDI,%R8,8) |
(3359) 0x4c8fd4 VMOVSD %XMM1,(%R14,%R11,8) |
(3359) 0x4c8fda MOV -0x90(%RBP),%RDX |
(3359) 0x4c8fe1 MOV %RCX,(%RDX,%R11,8) |
(3359) 0x4c8fe5 INC %R11 |
(3359) 0x4c8fe8 MOV -0x80(%RBP),%RCX |
(3359) 0x4c8fec MOV 0x8(%RCX,%R10,8),%RBX |
(3359) 0x4c8ff1 INC %RSI |
(3359) 0x4c8ff4 CMP %RBX,%RSI |
(3359) 0x4c8ff7 JL 4c8fb4 |
(3357) 0x4c8ff9 MOV -0x78(%RBP),%RAX |
(3357) 0x4c8ffd MOV (%RAX,%R10,8),%RSI |
(3357) 0x4c9001 MOV 0x8(%RAX,%R10,8),%RBX |
(3357) 0x4c9006 CMP %RBX,%RSI |
(3357) 0x4c9009 JGE 4c8f40 |
(3357) 0x4c900f MOV -0xa8(%RBP),%R8 |
(3357) 0x4c9016 JMP 4c903a |
0x4c9018 NOPL (%RAX,%RAX,1) |
(3358) 0x4c9020 VADDSD (%R13,%RDX,8),%XMM1,%XMM1 |
(3358) 0x4c9027 VMOVSD %XMM1,(%R13,%RDX,8) |
(3358) 0x4c902e INC %RSI |
(3358) 0x4c9031 CMP %RBX,%RSI |
(3358) 0x4c9034 JGE 4c8f40 |
(3358) 0x4c903a MOV (%R8,%RSI,8),%RCX |
(3358) 0x4c903e MOV (%RDI,%RCX,8),%RDX |
(3358) 0x4c9042 MOV -0x100(%RBP),%RAX |
(3358) 0x4c9049 VMULSD (%RAX,%RSI,8),%XMM0,%XMM1 |
(3358) 0x4c904e CMP %R12,%RDX |
(3358) 0x4c9051 JGE 4c9020 |
(3358) 0x4c9053 MOV %R15,(%RDI,%RCX,8) |
(3358) 0x4c9057 VMOVSD %XMM1,(%R13,%R15,8) |
(3358) 0x4c905e MOV -0x50(%RBP),%RAX |
(3358) 0x4c9062 MOV %RCX,(%RAX,%R15,8) |
(3358) 0x4c9066 INC %R15 |
(3358) 0x4c9069 MOV -0x78(%RBP),%RCX |
(3358) 0x4c906d MOV 0x8(%RCX,%R10,8),%RBX |
(3358) 0x4c9072 INC %RSI |
(3358) 0x4c9075 CMP %RBX,%RSI |
(3358) 0x4c9078 JL 4c903a |
(3357) 0x4c907a JMP 4c8f40 |
0x4c907f NOP |
(3353) 0x4c9080 LEA 0x1(%RCX),%RSI |
(3353) 0x4c9084 MOV -0x30(%RBP),%R11 |
(3353) 0x4c9088 MOV -0x68(%RBP),%RAX |
(3353) 0x4c908c MOV (%RAX,%RCX,8),%R8 |
(3353) 0x4c9090 CMP (%RAX,%RSI,8),%R8 |
(3353) 0x4c9094 JGE 4c8ec0 |
(3353) 0x4c909a JMP 4c90b6 |
0x4c909c NOPL (%RAX) |
(3353) 0x4c90a0 MOV -0x38(%RBP),%RSI |
(3353) 0x4c90a4 MOV -0x68(%RBP),%RAX |
(3353) 0x4c90a8 MOV (%RAX,%RCX,8),%R8 |
(3353) 0x4c90ac CMP (%RAX,%RSI,8),%R8 |
(3353) 0x4c90b0 JGE 4c8ec0 |
(3353) 0x4c90b6 MOV %RSI,-0x38(%RBP) |
(3353) 0x4c90ba JMP 4c90d5 |
0x4c90bc NOPL (%RAX) |
(3354) 0x4c90c0 INC %R8 |
(3354) 0x4c90c3 MOV -0x68(%RBP),%RAX |
(3354) 0x4c90c7 MOV -0x38(%RBP),%RSI |
(3354) 0x4c90cb CMP (%RAX,%RSI,8),%R8 |
(3354) 0x4c90cf JGE 4c8ec0 |
(3354) 0x4c90d5 MOV -0xe0(%RBP),%RAX |
(3354) 0x4c90dc MOV (%RAX,%R8,8),%R9 |
(3354) 0x4c90e0 MOV -0xd8(%RBP),%RAX |
(3354) 0x4c90e7 VMOVSD (%RAX,%R8,8),%XMM0 |
(3354) 0x4c90ed MOV -0x88(%RBP),%RAX |
(3354) 0x4c90f4 MOV (%RAX,%R9,8),%RSI |
(3354) 0x4c90f8 MOV 0x8(%RAX,%R9,8),%RBX |
(3354) 0x4c90fd CMP %RBX,%RSI |
(3354) 0x4c9100 JGE 4c9175 |
(3354) 0x4c9102 MOV -0x50(%RBP),%R10 |
(3354) 0x4c9106 MOV -0x110(%RBP),%RAX |
(3354) 0x4c910d JMP 4c9136 |
0x4c910f NOPW %CS:(%RAX,%RAX,1) |
0x4c911e XCHG %AX,%AX |
(3356) 0x4c9120 VADDSD (%R13,%RDX,8),%XMM1,%XMM1 |
(3356) 0x4c9127 VMOVSD %XMM1,(%R13,%RDX,8) |
(3356) 0x4c912e INC %RSI |
(3356) 0x4c9131 CMP %RBX,%RSI |
(3356) 0x4c9134 JGE 4c9175 |
(3356) 0x4c9136 MOV -0x118(%RBP),%RCX |
(3356) 0x4c913d MOV (%RCX,%RSI,8),%RCX |
(3356) 0x4c9141 MOV (%RDI,%RCX,8),%RDX |
(3356) 0x4c9145 VMULSD (%RAX,%RSI,8),%XMM0,%XMM1 |
(3356) 0x4c914a CMP %R12,%RDX |
(3356) 0x4c914d JGE 4c9120 |
(3356) 0x4c914f MOV %R15,(%RDI,%RCX,8) |
(3356) 0x4c9153 VMOVSD %XMM1,(%R13,%R15,8) |
(3356) 0x4c915a MOV %RCX,(%R10,%R15,8) |
(3356) 0x4c915e INC %R15 |
(3356) 0x4c9161 MOV -0x88(%RBP),%RCX |
(3356) 0x4c9168 MOV 0x8(%RCX,%R9,8),%RBX |
(3356) 0x4c916d INC %RSI |
(3356) 0x4c9170 CMP %RBX,%RSI |
(3356) 0x4c9173 JL 4c9136 |
(3354) 0x4c9175 CMPQ $0,-0xd0(%RBP) |
(3354) 0x4c917d JE 4c90c0 |
(3354) 0x4c9183 MOV -0x70(%RBP),%RAX |
(3354) 0x4c9187 MOV (%RAX,%R9,8),%RSI |
(3354) 0x4c918b MOV 0x8(%RAX,%R9,8),%RBX |
(3354) 0x4c9190 CMP %RBX,%RSI |
(3354) 0x4c9193 JGE 4c90c0 |
(3354) 0x4c9199 MOV -0xe8(%RBP),%R10 |
(3354) 0x4c91a0 JMP 4c91d8 |
0x4c91a2 NOPW %CS:(%RAX,%RAX,1) |
0x4c91b1 NOPW %CS:(%RAX,%RAX,1) |
(3355) 0x4c91c0 VADDSD (%R14,%RAX,8),%XMM1,%XMM1 |
(3355) 0x4c91c6 VMOVSD %XMM1,(%R14,%RAX,8) |
(3355) 0x4c91cc INC %RSI |
(3355) 0x4c91cf CMP %RBX,%RSI |
(3355) 0x4c91d2 JGE 4c90c0 |
(3355) 0x4c91d8 MOV -0xf0(%RBP),%RAX |
(3355) 0x4c91df MOV (%RAX,%RSI,8),%RAX |
(3355) 0x4c91e3 MOV -0xf8(%RBP),%RCX |
(3355) 0x4c91ea MOV (%RCX,%RAX,8),%RCX |
(3355) 0x4c91ee MOV -0x58(%RBP),%RAX |
(3355) 0x4c91f2 LEA (%RCX,%RAX,1),%RDX |
(3355) 0x4c91f6 MOV (%RDI,%RDX,8),%RAX |
(3355) 0x4c91fa VMULSD (%R10,%RSI,8),%XMM0,%XMM1 |
(3355) 0x4c9200 CMP -0x30(%RBP),%RAX |
(3355) 0x4c9204 JGE 4c91c0 |
(3355) 0x4c9206 MOV %R11,(%RDI,%RDX,8) |
(3355) 0x4c920a VMOVSD %XMM1,(%R14,%R11,8) |
(3355) 0x4c9210 MOV -0x90(%RBP),%RAX |
(3355) 0x4c9217 MOV %RCX,(%RAX,%R11,8) |
(3355) 0x4c921b INC %R11 |
(3355) 0x4c921e MOV -0x70(%RBP),%RAX |
(3355) 0x4c9222 MOV 0x8(%RAX,%R9,8),%RBX |
(3355) 0x4c9227 INC %RSI |
(3355) 0x4c922a CMP %RBX,%RSI |
(3355) 0x4c922d JL 4c91d8 |
(3354) 0x4c922f JMP 4c90c0 |
0x4c9234 NOPW %CS:(%RAX,%RAX,1) |
0x4c9240 ADD $0xf8,%RSP |
0x4c9247 POP %RBX |
0x4c9248 POP %R12 |
0x4c924a POP %R13 |
0x4c924c POP %R14 |
0x4c924e POP %R15 |
0x4c9250 POP %RBP |
0x4c9251 JMP 4f3eb0 |
0x4c9256 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | hypre_ParMatmul | par_csr_matop.c:829 | exec |
○ | hypre_BoomerAMGSetup | par_amg_setup.c:1226 | exec |
○ | hypre_PCGSetup | pcg.c:234 | exec |
○ | main | amg.c:398 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | par_csr_matop.c:829-995 |
Module | exec |
nb instructions | 143 |
nb uops | 212 |
loop length | 686 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 56 |
micro-operation queue | 53.00 cycles |
front end | 53.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 24.50 | 24.50 | 28.67 | 28.67 | 40.00 | 24.50 | 24.50 | 28.67 |
cycles | 24.50 | 24.50 | 28.67 | 28.67 | 40.00 | 24.50 | 24.50 | 28.67 |
Cycles executing div or sqrt instructions | 30.00-96.00 |
FE+BE cycles | 43.48-96.65 |
Stall cycles | 6.03-59.20 |
ROB full (events) | 0.03-50.21 |
SB full (events) | 7.52-13.99 |
Front-end | 53.00 |
Dispatch | 40.00 |
DIV/SQRT | 30.00-96.00 |
Overall L1 | 53.00-96.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0xf8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R9,-0xc0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,-0xe0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,-0x68(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDX,-0xd8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xe0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xa0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xd8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xb0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xd0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x108(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xa8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x100(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x90(%RBP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x88(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x50(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x80(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x78(%RBP),%R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x70(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x68(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xd0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x60(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xe8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x40(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x118(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x110(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x20(%RBP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 4f6100 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4f60f0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
OR %R15,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
JE 4c8e00 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
IDIV %RCX | 57 | 14.25 | 14.25 | 0 | 0 | 0 | 14.25 | 14.25 | 0 | 42-95 | 24-90 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4c8e09 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
DIV %ECX | 10 | 2.50 | 2.50 | 0 | 0 | 0 | 2.50 | 2.50 | 0 | 26 | 6 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA 0x1(%RBX),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
IMUL %RAX,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA 0x1(%RSI),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %RBX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %RBX,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RDX,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVL %RCX,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CMOVL %RAX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %RDX,-0x48(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV (%R12),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX,%RSI,8),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX,%RSI,8),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%R8),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x58(%RBP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RBX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
OR %RAX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 4c8e85 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %RBX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4f3da0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV (%R8),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD %R15,-0x48(%RBP) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
ADD %RBX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4c8ea6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP -0x48(%RBP),%RSI | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JGE 4c9240 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R12,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4c8ed1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0xf8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 4f3eb0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | par_csr_matop.c:829-995 |
Module | exec |
nb instructions | 143 |
nb uops | 212 |
loop length | 686 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 56 |
micro-operation queue | 53.00 cycles |
front end | 53.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 24.50 | 24.50 | 28.67 | 28.67 | 40.00 | 24.50 | 24.50 | 28.67 |
cycles | 24.50 | 24.50 | 28.67 | 28.67 | 40.00 | 24.50 | 24.50 | 28.67 |
Cycles executing div or sqrt instructions | 30.00-96.00 |
FE+BE cycles | 43.48-96.65 |
Stall cycles | 6.03-59.20 |
ROB full (events) | 0.03-50.21 |
SB full (events) | 7.52-13.99 |
Front-end | 53.00 |
Dispatch | 40.00 |
DIV/SQRT | 30.00-96.00 |
Overall L1 | 53.00-96.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0xf8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R9,-0xc0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,-0xe0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,-0x68(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDX,-0xd8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xe0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xa0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xd8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xb0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xd0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x108(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xa8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x100(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x98(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x90(%RBP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x88(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x50(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x80(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x78(%RBP),%R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x70(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xf8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x68(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xd0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x60(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x58(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xf0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x50(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xe8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x40(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x118(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x110(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x20(%RBP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 4f6100 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4f60f0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
OR %R15,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
JE 4c8e00 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
IDIV %RCX | 57 | 14.25 | 14.25 | 0 | 0 | 0 | 14.25 | 14.25 | 0 | 42-95 | 24-90 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4c8e09 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
DIV %ECX | 10 | 2.50 | 2.50 | 0 | 0 | 0 | 2.50 | 2.50 | 0 | 26 | 6 |
MOV %EAX,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA 0x1(%RBX),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
IMUL %RAX,%R15 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA 0x1(%RSI),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %RBX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %RBX,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RDX,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVL %RCX,%RSI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CMOVL %RAX,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV %RDX,-0x48(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV (%R12),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX,%RSI,8),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX,%RSI,8),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%R8),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x58(%RBP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RBX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
OR %RAX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 4c8e85 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %RBX,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RSI,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4f3da0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV (%R8),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD %R15,-0x48(%RBP) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
ADD %RBX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4c8ea6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP -0x48(%RBP),%RSI | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JGE 4c9240 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R12,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4c8ed1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
ADD $0xf8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 4f3eb0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_ParMatmul.extracted.12– | 4.04 | 1.53 |
▼Loop 3353 - par_csr_matop.c:865-989 - exec– | 0.08 | 0.03 |
▼Loop 3354 - par_csr_matop.c:937-989 - exec– | 1.03 | 0.39 |
○Loop 3356 - par_csr_matop.c:946-965 - exec | 2.93 | 1.11 |
○Loop 3355 - par_csr_matop.c:970-989 - exec | 0 | 0 |
▼Loop 3357 - par_csr_matop.c:888-928 - exec– | 0 | 0 |
○Loop 3358 - par_csr_matop.c:917-928 - exec | 0 | 0 |
○Loop 3359 - par_csr_matop.c:897-915 - exec | 0 | 0 |
○Loop 3360 - par_csr_matop.c:858-859 - exec | 0 | 0 |