Loop Id: 5060 | Module: exec | Source: par_csr_matop.c:109-231 [...] | Coverage: 0.37% |
---|
Loop Id: 5060 | Module: exec | Source: par_csr_matop.c:109-231 [...] | Coverage: 0.37% |
---|
0x539db0 MOV -0x48(%RBP),%R10 |
0x539db4 ADD $0x2,%R10 |
0x539db8 CMP -0x50(%RBP),%R10 |
0x539dbc JE 53a47d |
0x539dc2 MOV -0x60(%RBP),%RCX |
0x539dc6 MOV %R10,-0x48(%RBP) |
0x539dca LEA (%RCX,%R10,1),%R13 |
0x539dce MOV -0x68(%RBP),%RCX |
0x539dd2 MOV (%RCX,%R13,8),%R8 |
0x539dd6 MOV 0x20(%RBP),%RCX |
0x539dda MOV (%RCX,%R8,8),%R10 |
0x539dde MOV 0x8(%RCX,%R8,8),%R14 |
0x539de3 MOV %R14,%RCX |
0x539de6 SUB %R10,%RCX |
0x539de9 JLE 539f54 |
0x539def MOV %R13,-0x80(%RBP) |
0x539df3 CMP $0x8,%RCX |
0x539df7 JAE 539e40 |
0x539df9 MOV %RCX,%RDX |
0x539dfc AND $-0x8,%RDX |
0x539e00 CMP %RCX,%RDX |
0x539e03 JAE 539f50 |
0x539e09 ADD %RDX,%R10 |
0x539e0c MOV 0x28(%RBP),%RDX |
0x539e10 MOV -0x80(%RBP),%R13 |
0x539e14 JMP 539e2c |
(5067) 0x539e20 INC %R10 |
(5067) 0x539e23 CMP %R10,%R14 |
(5067) 0x539e26 JE 539f54 |
(5067) 0x539e2c MOV (%RDX,%R10,8),%RCX |
(5067) 0x539e30 CMP %R15,(%RBX,%RCX,8) |
(5067) 0x539e34 JGE 539e20 |
(5067) 0x539e36 MOV %RAX,(%RBX,%RCX,8) |
(5067) 0x539e3a INC %RAX |
(5067) 0x539e3d JMP 539e20 |
0x539e40 MOV -0x58(%RBP),%RDX |
0x539e44 MOV %RCX,%RSI |
0x539e47 SHR $0x3,%RSI |
0x539e4b LEA (%RDX,%R10,8),%RDX |
0x539e4f JMP 539e69 |
(5068) 0x539e60 ADD $0x40,%RDX |
(5068) 0x539e64 DEC %RSI |
(5068) 0x539e67 JE 539df9 |
(5068) 0x539e69 MOV -0x38(%RDX),%R13 |
(5068) 0x539e6d CMP %R15,(%RBX,%R13,8) |
(5068) 0x539e71 JGE 539ee0 |
(5068) 0x539e73 MOV %RAX,(%RBX,%R13,8) |
(5068) 0x539e77 INC %RAX |
(5068) 0x539e7a MOV -0x30(%RDX),%R13 |
(5068) 0x539e7e CMP %R15,(%RBX,%R13,8) |
(5068) 0x539e82 JL 539eea |
(5068) 0x539e84 MOV -0x28(%RDX),%R13 |
(5068) 0x539e88 CMP %R15,(%RBX,%R13,8) |
(5068) 0x539e8c JGE 539efb |
(5068) 0x539e8e MOV %RAX,(%RBX,%R13,8) |
(5068) 0x539e92 INC %RAX |
(5068) 0x539e95 MOV -0x20(%RDX),%R13 |
(5068) 0x539e99 CMP %R15,(%RBX,%R13,8) |
(5068) 0x539e9d JL 539f05 |
(5068) 0x539e9f MOV -0x18(%RDX),%R13 |
(5068) 0x539ea3 CMP %R15,(%RBX,%R13,8) |
(5068) 0x539ea7 JGE 539f16 |
(5068) 0x539ea9 MOV %RAX,(%RBX,%R13,8) |
(5068) 0x539ead INC %RAX |
(5068) 0x539eb0 MOV -0x10(%RDX),%R13 |
(5068) 0x539eb4 CMP %R15,(%RBX,%R13,8) |
(5068) 0x539eb8 JL 539f20 |
(5068) 0x539eba MOV -0x8(%RDX),%R13 |
(5068) 0x539ebe CMP %R15,(%RBX,%R13,8) |
(5068) 0x539ec2 JGE 539f31 |
(5068) 0x539ec4 MOV %RAX,(%RBX,%R13,8) |
(5068) 0x539ec8 INC %RAX |
(5068) 0x539ecb MOV (%RDX),%R13 |
(5068) 0x539ece CMP %R15,(%RBX,%R13,8) |
(5068) 0x539ed2 JGE 539e60 |
(5068) 0x539ed4 JMP 539f3e |
(5068) 0x539ee0 MOV -0x30(%RDX),%R13 |
(5068) 0x539ee4 CMP %R15,(%RBX,%R13,8) |
(5068) 0x539ee8 JGE 539e84 |
(5068) 0x539eea MOV %RAX,(%RBX,%R13,8) |
(5068) 0x539eee INC %RAX |
(5068) 0x539ef1 MOV -0x28(%RDX),%R13 |
(5068) 0x539ef5 CMP %R15,(%RBX,%R13,8) |
(5068) 0x539ef9 JL 539e8e |
(5068) 0x539efb MOV -0x20(%RDX),%R13 |
(5068) 0x539eff CMP %R15,(%RBX,%R13,8) |
(5068) 0x539f03 JGE 539e9f |
(5068) 0x539f05 MOV %RAX,(%RBX,%R13,8) |
(5068) 0x539f09 INC %RAX |
(5068) 0x539f0c MOV -0x18(%RDX),%R13 |
(5068) 0x539f10 CMP %R15,(%RBX,%R13,8) |
(5068) 0x539f14 JL 539ea9 |
(5068) 0x539f16 MOV -0x10(%RDX),%R13 |
(5068) 0x539f1a CMP %R15,(%RBX,%R13,8) |
(5068) 0x539f1e JGE 539eba |
(5068) 0x539f20 MOV %RAX,(%RBX,%R13,8) |
(5068) 0x539f24 INC %RAX |
(5068) 0x539f27 MOV -0x8(%RDX),%R13 |
(5068) 0x539f2b CMP %R15,(%RBX,%R13,8) |
(5068) 0x539f2f JL 539ec4 |
(5068) 0x539f31 MOV (%RDX),%R13 |
(5068) 0x539f34 CMP %R15,(%RBX,%R13,8) |
(5068) 0x539f38 JGE 539e60 |
(5068) 0x539f3e MOV %RAX,(%RBX,%R13,8) |
(5068) 0x539f42 INC %RAX |
(5068) 0x539f45 JMP 539e60 |
0x539f50 MOV -0x80(%RBP),%R13 |
0x539f54 MOV 0x30(%RBP),%RDX |
0x539f58 MOV (%RDX,%R8,8),%RCX |
0x539f5c MOV 0x8(%RDX,%R8,8),%RSI |
0x539f61 MOV %RSI,%RDX |
0x539f64 SUB %RCX,%RDX |
0x539f67 JLE 539fc0 |
0x539f69 CMP $0x4,%RDX |
0x539f6d JAE 53a090 |
0x539f73 MOV %RDX,%R8 |
0x539f76 AND $-0x4,%R8 |
0x539f7a CMP %RDX,%R8 |
0x539f7d JAE 539fc0 |
0x539f7f ADD %R8,%RCX |
0x539f82 MOV 0x38(%RBP),%R8 |
0x539f86 JMP 539f98 |
(5065) 0x539f90 INC %RCX |
(5065) 0x539f93 CMP %RCX,%RSI |
(5065) 0x539f96 JE 539fc0 |
(5065) 0x539f98 MOV (%R8,%RCX,8),%RDX |
(5065) 0x539f9c MOV (%RDI,%RDX,8),%RDX |
(5065) 0x539fa0 ADD %R12,%RDX |
(5065) 0x539fa3 CMP %R11,(%RBX,%RDX,8) |
(5065) 0x539fa7 JGE 539f90 |
(5065) 0x539fa9 MOV %R9,(%RBX,%RDX,8) |
(5065) 0x539fad INC %R9 |
(5065) 0x539fb0 JMP 539f90 |
0x539fc0 MOV -0x68(%RBP),%RCX |
0x539fc4 MOV 0x8(%RCX,%R13,8),%RDX |
0x539fc9 MOV 0x20(%RBP),%RCX |
0x539fcd MOV (%RCX,%RDX,8),%R8 |
0x539fd1 MOV 0x8(%RCX,%RDX,8),%R10 |
0x539fd6 MOV %R10,%RCX |
0x539fd9 SUB %R8,%RCX |
0x539fdc JLE 53a020 |
0x539fde CMP $0x8,%RCX |
0x539fe2 JAE 53a120 |
0x539fe8 MOV %RCX,%RSI |
0x539feb AND $-0x8,%RSI |
0x539fef CMP %RCX,%RSI |
0x539ff2 JAE 53a020 |
0x539ff4 ADD %RSI,%R8 |
0x539ff7 MOV 0x28(%RBP),%RSI |
0x539ffb JMP 53a008 |
(5063) 0x53a000 INC %R8 |
(5063) 0x53a003 CMP %R8,%R10 |
(5063) 0x53a006 JE 53a020 |
(5063) 0x53a008 MOV (%RSI,%R8,8),%RCX |
(5063) 0x53a00c CMP %R15,(%RBX,%RCX,8) |
(5063) 0x53a010 JGE 53a000 |
(5063) 0x53a012 MOV %RAX,(%RBX,%RCX,8) |
(5063) 0x53a016 INC %RAX |
(5063) 0x53a019 JMP 53a000 |
0x53a020 MOV 0x30(%RBP),%RSI |
0x53a024 MOV (%RSI,%RDX,8),%RCX |
0x53a028 MOV 0x8(%RSI,%RDX,8),%RDX |
0x53a02d MOV %RDX,%RSI |
0x53a030 SUB %RCX,%RSI |
0x53a033 JLE 539db0 |
0x53a039 CMP $0x4,%RSI |
0x53a03d JAE 53a230 |
0x53a043 MOV %RSI,%R8 |
0x53a046 AND $-0x4,%R8 |
0x53a04a CMP %RSI,%R8 |
0x53a04d JAE 539db0 |
0x53a053 ADD %R8,%RCX |
0x53a056 MOV 0x38(%RBP),%R8 |
0x53a05a MOV -0x48(%RBP),%R10 |
0x53a05e JMP 53a06c |
(5061) 0x53a060 INC %RCX |
(5061) 0x53a063 CMP %RCX,%RDX |
(5061) 0x53a066 JE 539db4 |
(5061) 0x53a06c MOV (%R8,%RCX,8),%RSI |
(5061) 0x53a070 MOV (%RDI,%RSI,8),%RSI |
(5061) 0x53a074 ADD %R12,%RSI |
(5061) 0x53a077 CMP %R11,(%RBX,%RSI,8) |
(5061) 0x53a07b JGE 53a060 |
(5061) 0x53a07d MOV %R9,(%RBX,%RSI,8) |
(5061) 0x53a081 INC %R9 |
(5061) 0x53a084 JMP 53a060 |
0x53a090 MOV -0xc0(%RBP),%R10 |
0x53a097 MOV %RDX,%R8 |
0x53a09a SHR $0x2,%R8 |
0x53a09e LEA (%R10,%RCX,8),%R10 |
0x53a0a2 JMP 53a0bd |
(5066) 0x53a0b0 ADD $0x20,%R10 |
(5066) 0x53a0b4 DEC %R8 |
(5066) 0x53a0b7 JE 539f73 |
(5066) 0x53a0bd MOV -0x18(%R10),%R14 |
(5066) 0x53a0c1 MOV (%RDI,%R14,8),%R14 |
(5066) 0x53a0c5 ADD %R12,%R14 |
(5066) 0x53a0c8 CMP %R11,(%RBX,%R14,8) |
(5066) 0x53a0cc JGE 53a0d5 |
(5066) 0x53a0ce MOV %R9,(%RBX,%R14,8) |
(5066) 0x53a0d2 INC %R9 |
(5066) 0x53a0d5 MOV -0x10(%R10),%R14 |
(5066) 0x53a0d9 MOV (%RDI,%R14,8),%R14 |
(5066) 0x53a0dd ADD %R12,%R14 |
(5066) 0x53a0e0 CMP %R11,(%RBX,%R14,8) |
(5066) 0x53a0e4 JGE 53a0ed |
(5066) 0x53a0e6 MOV %R9,(%RBX,%R14,8) |
(5066) 0x53a0ea INC %R9 |
(5066) 0x53a0ed MOV -0x8(%R10),%R14 |
(5066) 0x53a0f1 MOV (%RDI,%R14,8),%R14 |
(5066) 0x53a0f5 ADD %R12,%R14 |
(5066) 0x53a0f8 CMP %R11,(%RBX,%R14,8) |
(5066) 0x53a0fc JGE 53a105 |
(5066) 0x53a0fe MOV %R9,(%RBX,%R14,8) |
(5066) 0x53a102 INC %R9 |
(5066) 0x53a105 MOV (%R10),%R14 |
(5066) 0x53a108 MOV (%RDI,%R14,8),%R14 |
(5066) 0x53a10c ADD %R12,%R14 |
(5066) 0x53a10f CMP %R11,(%RBX,%R14,8) |
(5066) 0x53a113 JGE 53a0b0 |
(5066) 0x53a115 MOV %R9,(%RBX,%R14,8) |
(5066) 0x53a119 INC %R9 |
(5066) 0x53a11c JMP 53a0b0 |
0x53a120 MOV -0x58(%RBP),%R14 |
0x53a124 MOV %RCX,%RSI |
0x53a127 SHR $0x3,%RSI |
0x53a12b LEA (%R14,%R8,8),%R14 |
0x53a12f JMP 53a14d |
(5064) 0x53a140 ADD $0x40,%R14 |
(5064) 0x53a144 DEC %RSI |
(5064) 0x53a147 JE 539fe8 |
(5064) 0x53a14d MOV -0x38(%R14),%R13 |
(5064) 0x53a151 CMP %R15,(%RBX,%R13,8) |
(5064) 0x53a155 JGE 53a1c0 |
(5064) 0x53a157 MOV %RAX,(%RBX,%R13,8) |
(5064) 0x53a15b INC %RAX |
(5064) 0x53a15e MOV -0x30(%R14),%R13 |
(5064) 0x53a162 CMP %R15,(%RBX,%R13,8) |
(5064) 0x53a166 JL 53a1ca |
(5064) 0x53a168 MOV -0x28(%R14),%R13 |
(5064) 0x53a16c CMP %R15,(%RBX,%R13,8) |
(5064) 0x53a170 JGE 53a1db |
(5064) 0x53a172 MOV %RAX,(%RBX,%R13,8) |
(5064) 0x53a176 INC %RAX |
(5064) 0x53a179 MOV -0x20(%R14),%R13 |
(5064) 0x53a17d CMP %R15,(%RBX,%R13,8) |
(5064) 0x53a181 JL 53a1e5 |
(5064) 0x53a183 MOV -0x18(%R14),%R13 |
(5064) 0x53a187 CMP %R15,(%RBX,%R13,8) |
(5064) 0x53a18b JGE 53a1f6 |
(5064) 0x53a18d MOV %RAX,(%RBX,%R13,8) |
(5064) 0x53a191 INC %RAX |
(5064) 0x53a194 MOV -0x10(%R14),%R13 |
(5064) 0x53a198 CMP %R15,(%RBX,%R13,8) |
(5064) 0x53a19c JL 53a200 |
(5064) 0x53a19e MOV -0x8(%R14),%R13 |
(5064) 0x53a1a2 CMP %R15,(%RBX,%R13,8) |
(5064) 0x53a1a6 JGE 53a211 |
(5064) 0x53a1a8 MOV %RAX,(%RBX,%R13,8) |
(5064) 0x53a1ac INC %RAX |
(5064) 0x53a1af MOV (%R14),%R13 |
(5064) 0x53a1b2 CMP %R15,(%RBX,%R13,8) |
(5064) 0x53a1b6 JGE 53a140 |
(5064) 0x53a1b8 JMP 53a21e |
(5064) 0x53a1c0 MOV -0x30(%R14),%R13 |
(5064) 0x53a1c4 CMP %R15,(%RBX,%R13,8) |
(5064) 0x53a1c8 JGE 53a168 |
(5064) 0x53a1ca MOV %RAX,(%RBX,%R13,8) |
(5064) 0x53a1ce INC %RAX |
(5064) 0x53a1d1 MOV -0x28(%R14),%R13 |
(5064) 0x53a1d5 CMP %R15,(%RBX,%R13,8) |
(5064) 0x53a1d9 JL 53a172 |
(5064) 0x53a1db MOV -0x20(%R14),%R13 |
(5064) 0x53a1df CMP %R15,(%RBX,%R13,8) |
(5064) 0x53a1e3 JGE 53a183 |
(5064) 0x53a1e5 MOV %RAX,(%RBX,%R13,8) |
(5064) 0x53a1e9 INC %RAX |
(5064) 0x53a1ec MOV -0x18(%R14),%R13 |
(5064) 0x53a1f0 CMP %R15,(%RBX,%R13,8) |
(5064) 0x53a1f4 JL 53a18d |
(5064) 0x53a1f6 MOV -0x10(%R14),%R13 |
(5064) 0x53a1fa CMP %R15,(%RBX,%R13,8) |
(5064) 0x53a1fe JGE 53a19e |
(5064) 0x53a200 MOV %RAX,(%RBX,%R13,8) |
(5064) 0x53a204 INC %RAX |
(5064) 0x53a207 MOV -0x8(%R14),%R13 |
(5064) 0x53a20b CMP %R15,(%RBX,%R13,8) |
(5064) 0x53a20f JL 53a1a8 |
(5064) 0x53a211 MOV (%R14),%R13 |
(5064) 0x53a214 CMP %R15,(%RBX,%R13,8) |
(5064) 0x53a218 JGE 53a140 |
(5064) 0x53a21e MOV %RAX,(%RBX,%R13,8) |
(5064) 0x53a222 INC %RAX |
(5064) 0x53a225 JMP 53a140 |
0x53a230 MOV -0xc0(%RBP),%R10 |
0x53a237 MOV %RSI,%R8 |
0x53a23a SHR $0x2,%R8 |
0x53a23e LEA (%R10,%RCX,8),%R10 |
0x53a242 JMP 53a25d |
(5062) 0x53a250 ADD $0x20,%R10 |
(5062) 0x53a254 DEC %R8 |
(5062) 0x53a257 JE 53a043 |
(5062) 0x53a25d MOV -0x18(%R10),%R14 |
(5062) 0x53a261 MOV (%RDI,%R14,8),%R14 |
(5062) 0x53a265 ADD %R12,%R14 |
(5062) 0x53a268 CMP %R11,(%RBX,%R14,8) |
(5062) 0x53a26c JGE 53a275 |
(5062) 0x53a26e MOV %R9,(%RBX,%R14,8) |
(5062) 0x53a272 INC %R9 |
(5062) 0x53a275 MOV -0x10(%R10),%R14 |
(5062) 0x53a279 MOV (%RDI,%R14,8),%R14 |
(5062) 0x53a27d ADD %R12,%R14 |
(5062) 0x53a280 CMP %R11,(%RBX,%R14,8) |
(5062) 0x53a284 JGE 53a28d |
(5062) 0x53a286 MOV %R9,(%RBX,%R14,8) |
(5062) 0x53a28a INC %R9 |
(5062) 0x53a28d MOV -0x8(%R10),%R14 |
(5062) 0x53a291 MOV (%RDI,%R14,8),%R14 |
(5062) 0x53a295 ADD %R12,%R14 |
(5062) 0x53a298 CMP %R11,(%RBX,%R14,8) |
(5062) 0x53a29c JGE 53a2a5 |
(5062) 0x53a29e MOV %R9,(%RBX,%R14,8) |
(5062) 0x53a2a2 INC %R9 |
(5062) 0x53a2a5 MOV (%R10),%R14 |
(5062) 0x53a2a8 MOV (%RDI,%R14,8),%R14 |
(5062) 0x53a2ac ADD %R12,%R14 |
(5062) 0x53a2af CMP %R11,(%RBX,%R14,8) |
(5062) 0x53a2b3 JGE 53a250 |
(5062) 0x53a2b5 MOV %R9,(%RBX,%R14,8) |
(5062) 0x53a2b9 INC %R9 |
(5062) 0x53a2bc JMP 53a250 |
/home/eoseret/qaas_runs_CPU_9468/172-019-1763/intel/AMG/build/AMG/AMG/parcsr_mv/par_csr_matop.c: 109 - 231 |
-------------------------------------------------------------------------------- |
109: if (ii < rest) |
[...] |
187: for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) |
188: { |
189: i2 = A_diag_j[jj2]; |
[...] |
195: for (jj3 = B_diag_i[i2]; jj3 < B_diag_i[i2+1]; jj3++) |
196: { |
197: i3 = B_diag_j[jj3]; |
[...] |
205: if (B_marker[i3] < jj_row_begin_diag) |
206: { |
207: B_marker[i3] = jj_count_diag; |
208: jj_count_diag++; |
[...] |
218: for (jj3 = B_offd_i[i2]; jj3 < B_offd_i[i2+1]; jj3++) |
219: { |
220: i3 = num_cols_diag_B+map_B_to_C[B_offd_j[jj3]]; |
[...] |
228: if (B_marker[i3] < jj_row_begin_offd) |
229: { |
230: B_marker[i3] = jj_count_offd; |
231: jj_count_offd++; |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 6.13 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.48 |
Bottlenecks | micro-operation queue, |
Function | hypre_ParMatmul_RowSizes.extracted |
Source | par_csr_matop.c:109-109,par_csr_matop.c:187-189,par_csr_matop.c:195-195,par_csr_matop.c:208-208,par_csr_matop.c:218-218,par_csr_matop.c:231-231 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 15.83 |
CQA cycles if no scalar integer | 15.83 |
CQA cycles if FP arith vectorized | 15.83 |
CQA cycles if fully vectorized | 2.58 |
Front-end cycles | 15.83 |
DIV/SQRT cycles | 10.50 |
P0 cycles | 10.50 |
P1 cycles | 10.25 |
P2 cycles | 10.25 |
P3 cycles | 10.50 |
P4 cycles | 10.67 |
P5 cycles | 10.67 |
P6 cycles | 10.67 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 0.00 |
P10 cycles | 0.00 |
P11 cycles | 0.00 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 95.00 |
Nb uops | 95.00 |
Nb loads | 30.00 |
Nb stores | 2.00 |
Nb stack references | 11.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 16.17 |
Bytes prefetched | 0.00 |
Bytes loaded | 240.00 |
Bytes stored | 16.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.50 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 6.13 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.48 |
Bottlenecks | micro-operation queue, |
Function | hypre_ParMatmul_RowSizes.extracted |
Source | par_csr_matop.c:109-109,par_csr_matop.c:187-189,par_csr_matop.c:195-195,par_csr_matop.c:208-208,par_csr_matop.c:218-218,par_csr_matop.c:231-231 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 15.83 |
CQA cycles if no scalar integer | 15.83 |
CQA cycles if FP arith vectorized | 15.83 |
CQA cycles if fully vectorized | 2.58 |
Front-end cycles | 15.83 |
DIV/SQRT cycles | 10.50 |
P0 cycles | 10.50 |
P1 cycles | 10.25 |
P2 cycles | 10.25 |
P3 cycles | 10.50 |
P4 cycles | 10.67 |
P5 cycles | 10.67 |
P6 cycles | 10.67 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 0.00 |
P10 cycles | 0.00 |
P11 cycles | 0.00 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 95.00 |
Nb uops | 95.00 |
Nb loads | 30.00 |
Nb stores | 2.00 |
Nb stack references | 11.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 16.17 |
Bytes prefetched | 0.00 |
Bytes loaded | 240.00 |
Bytes stored | 16.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.50 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 12.50 |
Path / |
Function | hypre_ParMatmul_RowSizes.extracted |
Source file and lines | par_csr_matop.c:109-231 |
Module | exec |
nb instructions | 95 |
nb uops | 95 |
loop length | 357 |
used x86 registers | 8 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.83 cycles |
front end | 15.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 10.50 | 10.50 | 10.25 | 10.25 | 10.50 | 10.67 | 10.67 | 10.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 10.50 | 10.50 | 10.25 | 10.25 | 10.50 | 10.67 | 10.67 | 10.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 15.83 |
Dispatch | 10.67 |
Overall L1 | 15.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x48(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
ADD $0x2,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP -0x50(%RBP),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JE 53a47d <hypre_ParMatmul_RowSizes.extracted+0x113d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x60(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R10,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA (%RCX,%R10,1),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RCX,%R13,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RCX,%R8,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RCX,%R8,8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R14,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SUB %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JLE 539f54 <hypre_ParMatmul_RowSizes.extracted+0xc14> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CMP $0x8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 539e40 <hypre_ParMatmul_RowSizes.extracted+0xb00> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RCX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 539f50 <hypre_ParMatmul_RowSizes.extracted+0xc10> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %RDX,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x28(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x80(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 539e2c <hypre_ParMatmul_RowSizes.extracted+0xaec> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SHR $0x3,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%RDX,%R10,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 539e69 <hypre_ParMatmul_RowSizes.extracted+0xb29> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x80(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RDX,%R8,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RDX,%R8,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SUB %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JLE 539fc0 <hypre_ParMatmul_RowSizes.extracted+0xc80> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x4,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 53a090 <hypre_ParMatmul_RowSizes.extracted+0xd50> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RDX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x4,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RDX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 539fc0 <hypre_ParMatmul_RowSizes.extracted+0xc80> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %R8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x38(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 539f98 <hypre_ParMatmul_RowSizes.extracted+0xc58> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RCX,%R13,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RCX,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RCX,%RDX,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SUB %R8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JLE 53a020 <hypre_ParMatmul_RowSizes.extracted+0xce0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 53a120 <hypre_ParMatmul_RowSizes.extracted+0xde0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RCX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 53a020 <hypre_ParMatmul_RowSizes.extracted+0xce0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %RSI,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 53a008 <hypre_ParMatmul_RowSizes.extracted+0xcc8> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV 0x30(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RSI,%RDX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RSI,%RDX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RDX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SUB %RCX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JLE 539db0 <hypre_ParMatmul_RowSizes.extracted+0xa70> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x4,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 53a230 <hypre_ParMatmul_RowSizes.extracted+0xef0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RSI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x4,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RSI,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 539db0 <hypre_ParMatmul_RowSizes.extracted+0xa70> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %R8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x38(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x48(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 53a06c <hypre_ParMatmul_RowSizes.extracted+0xd2c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0xc0(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RDX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SHR $0x2,%R8 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%R10,%RCX,8),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 53a0bd <hypre_ParMatmul_RowSizes.extracted+0xd7d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x58(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SHR $0x3,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%R14,%R8,8),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 53a14d <hypre_ParMatmul_RowSizes.extracted+0xe0d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0xc0(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RSI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SHR $0x2,%R8 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%R10,%RCX,8),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 53a25d <hypre_ParMatmul_RowSizes.extracted+0xf1d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
Function | hypre_ParMatmul_RowSizes.extracted |
Source file and lines | par_csr_matop.c:109-231 |
Module | exec |
nb instructions | 95 |
nb uops | 95 |
loop length | 357 |
used x86 registers | 8 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 11 |
micro-operation queue | 15.83 cycles |
front end | 15.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 10.50 | 10.50 | 10.25 | 10.25 | 10.50 | 10.67 | 10.67 | 10.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 10.50 | 10.50 | 10.25 | 10.25 | 10.50 | 10.67 | 10.67 | 10.67 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 15.83 |
Dispatch | 10.67 |
Overall L1 | 15.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0x48(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
ADD $0x2,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP -0x50(%RBP),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JE 53a47d <hypre_ParMatmul_RowSizes.extracted+0x113d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x60(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R10,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA (%RCX,%R10,1),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RCX,%R13,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RCX,%R8,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RCX,%R8,8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R14,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SUB %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JLE 539f54 <hypre_ParMatmul_RowSizes.extracted+0xc14> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CMP $0x8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 539e40 <hypre_ParMatmul_RowSizes.extracted+0xb00> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RCX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 539f50 <hypre_ParMatmul_RowSizes.extracted+0xc10> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %RDX,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x28(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x80(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 539e2c <hypre_ParMatmul_RowSizes.extracted+0xaec> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x58(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SHR $0x3,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%RDX,%R10,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 539e69 <hypre_ParMatmul_RowSizes.extracted+0xb29> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x80(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RDX,%R8,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RDX,%R8,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SUB %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JLE 539fc0 <hypre_ParMatmul_RowSizes.extracted+0xc80> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x4,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 53a090 <hypre_ParMatmul_RowSizes.extracted+0xd50> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RDX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x4,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RDX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 539fc0 <hypre_ParMatmul_RowSizes.extracted+0xc80> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %R8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x38(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 539f98 <hypre_ParMatmul_RowSizes.extracted+0xc58> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RCX,%R13,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x20(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RCX,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RCX,%RDX,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SUB %R8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JLE 53a020 <hypre_ParMatmul_RowSizes.extracted+0xce0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 53a120 <hypre_ParMatmul_RowSizes.extracted+0xde0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RCX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 53a020 <hypre_ParMatmul_RowSizes.extracted+0xce0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %RSI,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 53a008 <hypre_ParMatmul_RowSizes.extracted+0xcc8> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV 0x30(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RSI,%RDX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RSI,%RDX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RDX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SUB %RCX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JLE 539db0 <hypre_ParMatmul_RowSizes.extracted+0xa70> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x4,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 53a230 <hypre_ParMatmul_RowSizes.extracted+0xef0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RSI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x4,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RSI,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 539db0 <hypre_ParMatmul_RowSizes.extracted+0xa70> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %R8,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x38(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x48(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 53a06c <hypre_ParMatmul_RowSizes.extracted+0xd2c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0xc0(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RDX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SHR $0x2,%R8 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%R10,%RCX,8),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 53a0bd <hypre_ParMatmul_RowSizes.extracted+0xd7d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x58(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SHR $0x3,%RSI | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%R14,%R8,8),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 53a14d <hypre_ParMatmul_RowSizes.extracted+0xe0d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0xc0(%RBP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RSI,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SHR $0x2,%R8 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%R10,%RCX,8),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 53a25d <hypre_ParMatmul_RowSizes.extracted+0xf1d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |