Function: hypre_CSRMatrixMatvecT._omp_fn.3 | Module: libseq_mv.so | Source: csr_matvec.c:554-579 [...] | Coverage: 2.24% |
---|
Function: hypre_CSRMatrixMatvecT._omp_fn.3 | Module: libseq_mv.so | Source: csr_matvec.c:554-579 [...] | Coverage: 2.24% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-3872/intel/AMG/build/AMG/AMG/seq_mv/csr_matvec.c: 554 - 579 |
-------------------------------------------------------------------------------- |
554: #pragma omp parallel private(i,jj,j,my_thread_num,offset) |
555: #endif |
556: { |
557: my_thread_num = hypre_GetThreadNum(); |
558: offset = y_size*my_thread_num; |
[...] |
564: for (jj = A_i[i]; jj < A_i[i+1]; jj++) |
565: { |
566: j = A_j[jj]; |
567: y_data_expand[offset + j] += A_data[jj] * x_data[i]; |
[...] |
577: for (j = 0; j < num_threads; j++) |
578: { |
579: y_data[i] += y_data_expand[j*y_size + i]; |
0x5830 PUSH %RBP |
0x5831 MOV %RSP,%RBP |
0x5834 PUSH %R15 |
0x5836 PUSH %R14 |
0x5838 PUSH %R13 |
0x583a PUSH %R12 |
0x583c PUSH %RBX |
0x583d SUB $0x38,%RSP |
0x5841 MOV 0x28(%RDI),%RDX |
0x5845 MOV 0x10(%RDI),%RSI |
0x5849 MOV 0x20(%RDI),%R10 |
0x584d MOV 0x30(%RDI),%RAX |
0x5851 MOV 0x18(%RDI),%R15 |
0x5855 MOV 0x40(%RDI),%R13 |
0x5859 MOV %RDX,-0x60(%RBP) |
0x585d MOV 0x38(%RDI),%RBX |
0x5861 MOV 0x8(%RDI),%R14 |
0x5865 MOV %RSI,-0x50(%RBP) |
0x5869 MOV (%RDI),%RDI |
0x586c MOV %R10,-0x58(%RBP) |
0x5870 MOV %RAX,-0x40(%RBP) |
0x5874 MOV %RDI,-0x48(%RBP) |
0x5878 CALL 2110 <hypre_GetThreadNum@plt> |
0x587d MOV %RAX,%R12 |
0x5880 CALL 2050 <omp_get_num_threads@plt> |
0x5885 CLTQ |
0x5887 MOV %RAX,-0x38(%RBP) |
0x588b CALL 2100 <omp_get_thread_num@plt> |
0x5890 MOV -0x48(%RBP),%R11 |
0x5894 MOV -0x50(%RBP),%RSI |
0x5898 MOVSXD %EAX,%R9 |
0x589b MOV %R15,%RAX |
0x589e MOV -0x58(%RBP),%R8 |
0x58a2 CQTO |
0x58a4 IDIVQ -0x38(%RBP) |
0x58a8 CMP %RDX,%R9 |
0x58ab MOV %RAX,%RCX |
0x58ae JL 5cf0 |
0x58b4 MOV %RCX,%R10 |
0x58b7 IMUL %R9,%R10 |
0x58bb ADD %R10,%RDX |
0x58be ADD %RDX,%RCX |
0x58c1 CMP %RCX,%RDX |
0x58c4 JGE 5b4f |
0x58ca MOV -0x40(%RBP),%R15 |
0x58ce SAL $0x3,%RDX |
0x58d2 LEA (%R8,%RCX,8),%RDI |
0x58d6 ADD %RDX,%R14 |
0x58d9 ADD %R8,%RDX |
0x58dc IMUL %R15,%R12 |
(32) 0x58e0 MOV (%R14),%RAX |
(32) 0x58e3 MOV 0x8(%R14),%R8 |
(32) 0x58e7 CMP %R8,%RAX |
(32) 0x58ea JGE 5b3e |
(32) 0x58f0 MOV %R8,%RCX |
(32) 0x58f3 SUB %RAX,%RCX |
(32) 0x58f6 AND $0x7,%ECX |
(32) 0x58f9 JE 5a29 |
(32) 0x58ff CMP $0x1,%RCX |
(32) 0x5903 JE 59fd |
(32) 0x5909 CMP $0x2,%RCX |
(32) 0x590d JE 59dc |
(32) 0x5913 CMP $0x3,%RCX |
(32) 0x5917 JE 59b9 |
(32) 0x591d CMP $0x4,%RCX |
(32) 0x5921 JE 5996 |
(32) 0x5923 CMP $0x5,%RCX |
(32) 0x5927 JE 5975 |
(32) 0x5929 CMP $0x6,%RCX |
(32) 0x592d JE 5952 |
(32) 0x592f MOV (%RSI,%RAX,8),%R10 |
(32) 0x5933 VMOVSD (%R11,%RAX,8),%XMM0 |
(32) 0x5939 INC %RAX |
(32) 0x593c ADD %R12,%R10 |
(32) 0x593f LEA (%RBX,%R10,8),%R15 |
(32) 0x5943 VMOVSD (%R15),%XMM7 |
(32) 0x5948 VFMADD132SD (%RDX),%XMM7,%XMM0 |
(32) 0x594d VMOVSD %XMM0,(%R15) |
(32) 0x5952 MOV (%RSI,%RAX,8),%RCX |
(32) 0x5956 VMOVSD (%R11,%RAX,8),%XMM1 |
(32) 0x595c INC %RAX |
(32) 0x595f ADD %R12,%RCX |
(32) 0x5962 LEA (%RBX,%RCX,8),%R10 |
(32) 0x5966 VMOVSD (%R10),%XMM2 |
(32) 0x596b VFMADD132SD (%RDX),%XMM2,%XMM1 |
(32) 0x5970 VMOVSD %XMM1,(%R10) |
(32) 0x5975 MOV (%RSI,%RAX,8),%R15 |
(32) 0x5979 VMOVSD (%R11,%RAX,8),%XMM3 |
(32) 0x597f INC %RAX |
(32) 0x5982 ADD %R12,%R15 |
(32) 0x5985 LEA (%RBX,%R15,8),%RCX |
(32) 0x5989 VMOVSD (%RCX),%XMM6 |
(32) 0x598d VFMADD132SD (%RDX),%XMM6,%XMM3 |
(32) 0x5992 VMOVSD %XMM3,(%RCX) |
(32) 0x5996 MOV (%RSI,%RAX,8),%R10 |
(32) 0x599a VMOVSD (%R11,%RAX,8),%XMM4 |
(32) 0x59a0 INC %RAX |
(32) 0x59a3 ADD %R12,%R10 |
(32) 0x59a6 LEA (%RBX,%R10,8),%R15 |
(32) 0x59aa VMOVSD (%R15),%XMM5 |
(32) 0x59af VFMADD132SD (%RDX),%XMM5,%XMM4 |
(32) 0x59b4 VMOVSD %XMM4,(%R15) |
(32) 0x59b9 MOV (%RSI,%RAX,8),%RCX |
(32) 0x59bd VMOVSD (%R11,%RAX,8),%XMM8 |
(32) 0x59c3 INC %RAX |
(32) 0x59c6 ADD %R12,%RCX |
(32) 0x59c9 LEA (%RBX,%RCX,8),%R10 |
(32) 0x59cd VMOVSD (%R10),%XMM9 |
(32) 0x59d2 VFMADD132SD (%RDX),%XMM9,%XMM8 |
(32) 0x59d7 VMOVSD %XMM8,(%R10) |
(32) 0x59dc MOV (%RSI,%RAX,8),%R15 |
(32) 0x59e0 VMOVSD (%R11,%RAX,8),%XMM10 |
(32) 0x59e6 INC %RAX |
(32) 0x59e9 ADD %R12,%R15 |
(32) 0x59ec LEA (%RBX,%R15,8),%RCX |
(32) 0x59f0 VMOVSD (%RCX),%XMM11 |
(32) 0x59f4 VFMADD132SD (%RDX),%XMM11,%XMM10 |
(32) 0x59f9 VMOVSD %XMM10,(%RCX) |
(32) 0x59fd MOV (%RSI,%RAX,8),%R10 |
(32) 0x5a01 VMOVSD (%R11,%RAX,8),%XMM12 |
(32) 0x5a07 INC %RAX |
(32) 0x5a0a ADD %R12,%R10 |
(32) 0x5a0d LEA (%RBX,%R10,8),%R15 |
(32) 0x5a11 VMOVSD (%R15),%XMM13 |
(32) 0x5a16 VFMADD132SD (%RDX),%XMM13,%XMM12 |
(32) 0x5a1b VMOVSD %XMM12,(%R15) |
(32) 0x5a20 CMP %R8,%RAX |
(32) 0x5a23 JE 5b3e |
(33) 0x5a29 MOV (%RSI,%RAX,8),%RCX |
(33) 0x5a2d VMOVSD (%R11,%RAX,8),%XMM14 |
(33) 0x5a33 MOV 0x8(%RSI,%RAX,8),%R15 |
(33) 0x5a38 ADD %R12,%RCX |
(33) 0x5a3b LEA (%RBX,%RCX,8),%R10 |
(33) 0x5a3f ADD %R12,%R15 |
(33) 0x5a42 VMOVSD (%R10),%XMM15 |
(33) 0x5a47 LEA (%RBX,%R15,8),%RCX |
(33) 0x5a4b VFMADD132SD (%RDX),%XMM15,%XMM14 |
(33) 0x5a50 VMOVSD %XMM14,(%R10) |
(33) 0x5a55 MOV 0x10(%RSI,%RAX,8),%R10 |
(33) 0x5a5a VMOVSD 0x8(%R11,%RAX,8),%XMM0 |
(33) 0x5a61 VMOVSD (%RCX),%XMM7 |
(33) 0x5a65 ADD %R12,%R10 |
(33) 0x5a68 VFMADD132SD (%RDX),%XMM7,%XMM0 |
(33) 0x5a6d LEA (%RBX,%R10,8),%R15 |
(33) 0x5a71 VMOVSD %XMM0,(%RCX) |
(33) 0x5a75 MOV 0x18(%RSI,%RAX,8),%RCX |
(33) 0x5a7a VMOVSD 0x10(%R11,%RAX,8),%XMM1 |
(33) 0x5a81 VMOVSD (%R15),%XMM2 |
(33) 0x5a86 ADD %R12,%RCX |
(33) 0x5a89 VFMADD132SD (%RDX),%XMM2,%XMM1 |
(33) 0x5a8e LEA (%RBX,%RCX,8),%R10 |
(33) 0x5a92 VMOVSD %XMM1,(%R15) |
(33) 0x5a97 MOV 0x20(%RSI,%RAX,8),%R15 |
(33) 0x5a9c VMOVSD 0x18(%R11,%RAX,8),%XMM3 |
(33) 0x5aa3 VMOVSD (%R10),%XMM6 |
(33) 0x5aa8 ADD %R12,%R15 |
(33) 0x5aab VFMADD132SD (%RDX),%XMM6,%XMM3 |
(33) 0x5ab0 LEA (%RBX,%R15,8),%RCX |
(33) 0x5ab4 VMOVSD %XMM3,(%R10) |
(33) 0x5ab9 MOV 0x28(%RSI,%RAX,8),%R10 |
(33) 0x5abe VMOVSD 0x20(%R11,%RAX,8),%XMM4 |
(33) 0x5ac5 VMOVSD (%RCX),%XMM5 |
(33) 0x5ac9 ADD %R12,%R10 |
(33) 0x5acc VFMADD132SD (%RDX),%XMM5,%XMM4 |
(33) 0x5ad1 LEA (%RBX,%R10,8),%R15 |
(33) 0x5ad5 VMOVSD %XMM4,(%RCX) |
(33) 0x5ad9 MOV 0x30(%RSI,%RAX,8),%RCX |
(33) 0x5ade VMOVSD 0x28(%R11,%RAX,8),%XMM8 |
(33) 0x5ae5 VMOVSD (%R15),%XMM9 |
(33) 0x5aea ADD %R12,%RCX |
(33) 0x5aed VFMADD132SD (%RDX),%XMM9,%XMM8 |
(33) 0x5af2 LEA (%RBX,%RCX,8),%R10 |
(33) 0x5af6 VMOVSD %XMM8,(%R15) |
(33) 0x5afb VMOVSD 0x30(%R11,%RAX,8),%XMM10 |
(33) 0x5b02 VMOVSD (%R10),%XMM11 |
(33) 0x5b07 MOV 0x38(%RSI,%RAX,8),%R15 |
(33) 0x5b0c VFMADD132SD (%RDX),%XMM11,%XMM10 |
(33) 0x5b11 ADD %R12,%R15 |
(33) 0x5b14 LEA (%RBX,%R15,8),%RCX |
(33) 0x5b18 VMOVSD %XMM10,(%R10) |
(33) 0x5b1d VMOVSD 0x38(%R11,%RAX,8),%XMM12 |
(33) 0x5b24 VMOVSD (%RCX),%XMM13 |
(33) 0x5b28 ADD $0x8,%RAX |
(33) 0x5b2c VFMADD132SD (%RDX),%XMM13,%XMM12 |
(33) 0x5b31 VMOVSD %XMM12,(%RCX) |
(33) 0x5b35 CMP %R8,%RAX |
(33) 0x5b38 JNE 5a29 |
(32) 0x5b3e ADD $0x8,%RDX |
(32) 0x5b42 ADD $0x8,%R14 |
(32) 0x5b46 CMP %RDX,%RDI |
(32) 0x5b49 JNE 58e0 |
0x5b4f MOV %R9,-0x48(%RBP) |
0x5b53 CALL 2140 <GOMP_barrier@plt> |
0x5b58 MOV -0x40(%RBP),%RAX |
0x5b5c MOV -0x48(%RBP),%R14 |
0x5b60 CQTO |
0x5b62 IDIVQ -0x38(%RBP) |
0x5b66 CMP %RDX,%R14 |
0x5b69 JL 5ce6 |
0x5b6f IMUL %RAX,%R14 |
0x5b73 ADD %R14,%RDX |
0x5b76 ADD %RDX,%RAX |
0x5b79 CMP %RAX,%RDX |
0x5b7c JGE 5cd3 |
0x5b82 TEST %R13,%R13 |
0x5b85 JLE 5cd3 |
0x5b8b MOV -0x60(%RBP),%R11 |
0x5b8f LEA (,%RDX,8),%R12 |
0x5b97 ADD %R12,%R11 |
0x5b9a ADD %RBX,%R12 |
0x5b9d MOV -0x40(%RBP),%RBX |
0x5ba1 SAL $0x3,%RBX |
0x5ba5 NOPL (%RAX) |
(31) 0x5ba8 MOV %R13,%R9 |
(31) 0x5bab VMOVSD (%R11),%XMM14 |
(31) 0x5bb0 MOV %R12,%RDI |
(31) 0x5bb3 XOR %ESI,%ESI |
(31) 0x5bb5 AND $0x7,%R9D |
(31) 0x5bb9 JE 5c56 |
(31) 0x5bbf CMP $0x1,%R9 |
(31) 0x5bc3 JE 5c42 |
(31) 0x5bc5 CMP $0x2,%R9 |
(31) 0x5bc9 JE 5c33 |
(31) 0x5bcb CMP $0x3,%R9 |
(31) 0x5bcf JE 5c24 |
(31) 0x5bd1 CMP $0x4,%R9 |
(31) 0x5bd5 JE 5c15 |
(31) 0x5bd7 CMP $0x5,%R9 |
(31) 0x5bdb JE 5c06 |
(31) 0x5bdd CMP $0x6,%R9 |
(31) 0x5be1 JE 5bf7 |
(31) 0x5be3 VADDSD (%R12),%XMM14,%XMM14 |
(31) 0x5be9 MOV $0x1,%ESI |
(31) 0x5bee LEA (%R12,%RBX,1),%RDI |
(31) 0x5bf2 VMOVSD %XMM14,(%R11) |
(31) 0x5bf7 VADDSD (%RDI),%XMM14,%XMM14 |
(31) 0x5bfb INC %RSI |
(31) 0x5bfe ADD %RBX,%RDI |
(31) 0x5c01 VMOVSD %XMM14,(%R11) |
(31) 0x5c06 VADDSD (%RDI),%XMM14,%XMM14 |
(31) 0x5c0a INC %RSI |
(31) 0x5c0d ADD %RBX,%RDI |
(31) 0x5c10 VMOVSD %XMM14,(%R11) |
(31) 0x5c15 VADDSD (%RDI),%XMM14,%XMM14 |
(31) 0x5c19 INC %RSI |
(31) 0x5c1c ADD %RBX,%RDI |
(31) 0x5c1f VMOVSD %XMM14,(%R11) |
(31) 0x5c24 VADDSD (%RDI),%XMM14,%XMM14 |
(31) 0x5c28 INC %RSI |
(31) 0x5c2b ADD %RBX,%RDI |
(31) 0x5c2e VMOVSD %XMM14,(%R11) |
(31) 0x5c33 VADDSD (%RDI),%XMM14,%XMM14 |
(31) 0x5c37 INC %RSI |
(31) 0x5c3a ADD %RBX,%RDI |
(31) 0x5c3d VMOVSD %XMM14,(%R11) |
(31) 0x5c42 VADDSD (%RDI),%XMM14,%XMM14 |
(31) 0x5c46 INC %RSI |
(31) 0x5c49 ADD %RBX,%RDI |
(31) 0x5c4c VMOVSD %XMM14,(%R11) |
(31) 0x5c51 CMP %RSI,%R13 |
(31) 0x5c54 JE 5cbf |
(30) 0x5c56 VADDSD (%RDI),%XMM14,%XMM15 |
(30) 0x5c5a ADD %RBX,%RDI |
(30) 0x5c5d ADD $0x8,%RSI |
(30) 0x5c61 VMOVSD %XMM15,(%R11) |
(30) 0x5c66 VADDSD (%RDI),%XMM15,%XMM0 |
(30) 0x5c6a ADD %RBX,%RDI |
(30) 0x5c6d VMOVSD %XMM0,(%R11) |
(30) 0x5c72 VADDSD (%RDI),%XMM0,%XMM7 |
(30) 0x5c76 ADD %RBX,%RDI |
(30) 0x5c79 VMOVSD %XMM7,(%R11) |
(30) 0x5c7e VADDSD (%RDI),%XMM7,%XMM1 |
(30) 0x5c82 ADD %RBX,%RDI |
(30) 0x5c85 VMOVSD %XMM1,(%R11) |
(30) 0x5c8a VADDSD (%RDI),%XMM1,%XMM2 |
(30) 0x5c8e ADD %RBX,%RDI |
(30) 0x5c91 VMOVSD %XMM2,(%R11) |
(30) 0x5c96 VADDSD (%RDI),%XMM2,%XMM3 |
(30) 0x5c9a ADD %RBX,%RDI |
(30) 0x5c9d VMOVSD %XMM3,(%R11) |
(30) 0x5ca2 VADDSD (%RDI),%XMM3,%XMM6 |
(30) 0x5ca6 ADD %RBX,%RDI |
(30) 0x5ca9 VMOVSD %XMM6,(%R11) |
(30) 0x5cae VADDSD (%RDI),%XMM6,%XMM14 |
(30) 0x5cb2 ADD %RBX,%RDI |
(30) 0x5cb5 VMOVSD %XMM14,(%R11) |
(30) 0x5cba CMP %RSI,%R13 |
(30) 0x5cbd JNE 5c56 |
(31) 0x5cbf INC %RDX |
(31) 0x5cc2 ADD $0x8,%R11 |
(31) 0x5cc6 ADD $0x8,%R12 |
(31) 0x5cca CMP %RDX,%RAX |
(31) 0x5ccd JNE 5ba8 |
0x5cd3 ADD $0x38,%RSP |
0x5cd7 POP %RBX |
0x5cd8 POP %R12 |
0x5cda POP %R13 |
0x5cdc POP %R14 |
0x5cde POP %R15 |
0x5ce0 POP %RBP |
0x5ce1 JMP 2140 |
0x5ce6 INC %RAX |
0x5ce9 XOR %EDX,%EDX |
0x5ceb JMP 5b6f |
0x5cf0 INC %RCX |
0x5cf3 XOR %EDX,%EDX |
0x5cf5 JMP 58b4 |
0x5cfa NOPW (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○98.04 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○1.96 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | csr_matvec.c:554-579 |
Module | libseq_mv.so |
nb instructions | 87 |
nb uops | 99 |
loop length | 310 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 16.50 cycles |
front end | 16.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.10 | 9.00 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
cycles | 6.10 | 12.43 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
Cycles executing div or sqrt instructions | 20.00 |
FE+BE cycles | 20.18-20.23 |
Stall cycles | 4.18-4.25 |
ROB full (events) | 3.47-0.00 |
PRF_INT full (events) | 1.72-4.40 |
Front-end | 16.50 |
Dispatch | 12.43 |
DIV/SQRT | 20.00 |
Overall L1 | 20.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDI),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RDI),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 2110 <hypre_GetThreadNum@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 2050 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 2100 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EAX,%R9 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JL 5cf0 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R9,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R10,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 5b4f <hypre_CSRMatrixMatvecT._omp_fn.3+0x31f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R8,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %R15,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 2140 <GOMP_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 5ce6 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4b6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %RAX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R14,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 5cd3 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R13,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 5cd3 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RDX,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R12,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RBX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 2140 <GOMP_barrier@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 5b6f <hypre_CSRMatrixMatvecT._omp_fn.3+0x33f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 58b4 <hypre_CSRMatrixMatvecT._omp_fn.3+0x84> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | csr_matvec.c:554-579 |
Module | libseq_mv.so |
nb instructions | 87 |
nb uops | 99 |
loop length | 310 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 16.50 cycles |
front end | 16.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 6.10 | 9.00 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
cycles | 6.10 | 12.43 | 8.33 | 8.33 | 8.50 | 6.00 | 5.90 | 8.50 | 8.50 | 8.50 | 6.00 | 8.33 |
Cycles executing div or sqrt instructions | 20.00 |
FE+BE cycles | 20.18-20.23 |
Stall cycles | 4.18-4.25 |
ROB full (events) | 3.47-0.00 |
PRF_INT full (events) | 1.72-4.40 |
Front-end | 16.50 |
Dispatch | 12.43 |
DIV/SQRT | 20.00 |
Overall L1 | 20.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x28(%RDI),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RDI),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x40(%RDI),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RDI),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 2110 <hypre_GetThreadNum@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 2050 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
CLTQ | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 2100 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %EAX,%R9 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JL 5cf0 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R9,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R10,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RCX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 5b4f <hypre_CSRMatrixMatvecT._omp_fn.3+0x31f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R8,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %R15,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 2140 <GOMP_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIVQ -0x38(%RBP) | 5 | 0 | 3 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 11-16 | 10 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 5ce6 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4b6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %RAX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R14,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RDX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 5cd3 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
TEST %R13,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 5cd3 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (,%RDX,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %R12,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RBX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x40(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x3,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x38,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
JMP 2140 <GOMP_barrier@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 5b6f <hypre_CSRMatrixMatvecT._omp_fn.3+0x33f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
INC %RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 58b4 <hypre_CSRMatrixMatvecT._omp_fn.3+0x84> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_CSRMatrixMatvecT._omp_fn.3– | 2.24 | 0.41 |
▼Loop 32 - csr_matvec.c:564-567 - libseq_mv.so– | 2.14 | 0.31 |
○Loop 33 - csr_matvec.c:564-567 - libseq_mv.so | 0.03 | 0 |
▼Loop 31 - csr_matvec.c:577-579 - libseq_mv.so– | 0.01 | 0 |
○Loop 30 - csr_matvec.c:577-579 - libseq_mv.so | 0.07 | 0.01 |