Function: hypre_CSRMatrixMatvecOutOfPlace.extracted.19 | Module: libseq_mv.so | Source: csr_matvec.c:178-204 [...] | Coverage: 0.09% |
---|
Function: hypre_CSRMatrixMatvecOutOfPlace.extracted.19 | Module: libseq_mv.so | Source: csr_matvec.c:178-204 [...] | Coverage: 0.09% |
---|
/home/eoseret/qaas_runs_CPU_9468/172-019-1763/intel/AMG/build/AMG/AMG/seq_mv/csr_matvec.c: 178 - 204 |
-------------------------------------------------------------------------------- |
178: #pragma omp parallel for private(i,j,jj,m,tempx) HYPRE_SMP_SCHEDULE |
179: #endif |
180: |
181: for (i = 0; i < num_rownnz; i++) |
182: { |
183: m = A_rownnz[i]; |
[...] |
191: if ( num_vectors==1 ) |
192: { |
193: tempx = 0; |
194: for (jj = A_i[m]; jj < A_i[m+1]; jj++) |
195: tempx += A_data[jj] * x_data[A_j[jj]]; |
196: y_data[m] += tempx; |
197: } |
198: else |
199: for ( j=0; j<num_vectors; ++j ) |
200: { |
201: tempx = 0; |
202: for (jj = A_i[m]; jj < A_i[m+1]; jj++) |
203: tempx += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ]; |
204: y_data[ j*vecstride_y + m*idxstride_y] += tempx; |
0x8c10 PUSH %RBP |
0x8c11 MOV %RSP,%RBP |
0x8c14 PUSH %R15 |
0x8c16 PUSH %R14 |
0x8c18 PUSH %R13 |
0x8c1a PUSH %R12 |
0x8c1c PUSH %RBX |
0x8c1d SUB $0x88,%RSP |
0x8c24 MOV %R9,-0x70(%RBP) |
0x8c28 MOV %R8,%R14 |
0x8c2b MOV %RCX,-0x68(%RBP) |
0x8c2f MOV %RDX,%R12 |
0x8c32 MOV 0x40(%RBP),%RAX |
0x8c36 MOV %RAX,-0x38(%RBP) |
0x8c3a MOV 0x38(%RBP),%R13 |
0x8c3e MOV 0x30(%RBP),%RAX |
0x8c42 MOV %RAX,-0xa8(%RBP) |
0x8c49 MOV 0x28(%RBP),%RAX |
0x8c4d MOV %RAX,-0x88(%RBP) |
0x8c54 MOV 0x20(%RBP),%R15 |
0x8c58 MOV 0x18(%RBP),%RAX |
0x8c5c MOV %RAX,-0x78(%RBP) |
0x8c60 MOV 0x10(%RBP),%RBX |
0x8c64 MOVL $0,-0x50(%RBP) |
0x8c6b MOV (%RDI),%ESI |
0x8c6d MOVQ $0,-0x80(%RBP) |
0x8c75 MOVQ $0x1,-0xb0(%RBP) |
0x8c80 SUB $0x8,%RSP |
0x8c84 LEA -0xb0(%RBP),%RAX |
0x8c8b LEA 0x117ee(%RIP),%RDI |
0x8c92 LEA -0x50(%RBP),%RCX |
0x8c96 LEA -0x80(%RBP),%R8 |
0x8c9a LEA 0x50(%RBP),%R9 |
0x8c9e MOV %ESI,-0x4c(%RBP) |
0x8ca1 MOV $0x22,%EDX |
0x8ca6 PUSH $0x1 |
0x8ca8 PUSH $0x1 |
0x8caa PUSH %RAX |
0x8cab CALL 40c0 <__kmpc_for_static_init_8@plt> |
0x8cb0 ADD $0x20,%RSP |
0x8cb4 MOV -0x80(%RBP),%RAX |
0x8cb8 MOV 0x50(%RBP),%RCX |
0x8cbc MOV %RAX,-0x60(%RBP) |
0x8cc0 SUB %RAX,%RCX |
0x8cc3 MOV %RCX,-0x58(%RBP) |
0x8cc7 JAE 8ce9 |
0x8cc9 LEA 0x117d0(%RIP),%RDI |
0x8cd0 MOV -0x4c(%RBP),%ESI |
0x8cd3 ADD $0x88,%RSP |
0x8cda POP %RBX |
0x8cdb POP %R12 |
0x8cdd POP %R13 |
0x8cdf POP %R14 |
0x8ce1 POP %R15 |
0x8ce3 POP %RBP |
0x8ce4 JMP 4030 |
0x8ce9 CMP $0x1,%R15 |
0x8ced JNE 8ef2 |
0x8cf3 XOR %EDX,%EDX |
0x8cf5 LEA 0xb48c(%RIP),%RSI |
0x8cfc JMP 8db4 |
0x8d01 ADD %RAX,%R8 |
0x8d04 MOV 0x30(%R14,%R8,8),%RAX |
0x8d09 MOVSD (%RBX,%RAX,8),%XMM1 |
0x8d0e MULSD 0x30(%R12,%R8,8),%XMM1 |
0x8d15 ADDSD %XMM1,%XMM0 |
0x8d19 MOV 0x28(%R14,%R8,8),%RAX |
0x8d1e MOVSD (%RBX,%RAX,8),%XMM1 |
0x8d23 MULSD 0x28(%R12,%R8,8),%XMM1 |
0x8d2a ADDSD %XMM1,%XMM0 |
0x8d2e MOV 0x20(%R14,%R8,8),%RAX |
0x8d33 MOVSD (%RBX,%RAX,8),%XMM1 |
0x8d38 MULSD 0x20(%R12,%R8,8),%XMM1 |
0x8d3f ADDSD %XMM1,%XMM0 |
0x8d43 MOV 0x18(%R14,%R8,8),%RAX |
0x8d48 MOVSD (%RBX,%RAX,8),%XMM1 |
0x8d4d MULSD 0x18(%R12,%R8,8),%XMM1 |
0x8d54 ADDSD %XMM1,%XMM0 |
0x8d58 MOV 0x10(%R14,%R8,8),%RAX |
0x8d5d MOVSD (%RBX,%RAX,8),%XMM1 |
0x8d62 MULSD 0x10(%R12,%R8,8),%XMM1 |
0x8d69 ADDSD %XMM1,%XMM0 |
0x8d6d MOV 0x8(%R14,%R8,8),%RAX |
0x8d72 MOVSD (%RBX,%RAX,8),%XMM1 |
0x8d77 MULSD 0x8(%R12,%R8,8),%XMM1 |
0x8d7e ADDSD %XMM1,%XMM0 |
0x8d82 MOV (%R14,%R8,8),%RAX |
0x8d86 MOVSD (%RBX,%RAX,8),%XMM1 |
0x8d8b MULSD (%R12,%R8,8),%XMM1 |
0x8d91 ADDSD %XMM1,%XMM0 |
(96) 0x8d95 MOV -0x78(%RBP),%RAX |
(96) 0x8d99 ADDSD (%RAX,%RDI,8),%XMM0 |
(96) 0x8d9e MOVSD %XMM0,(%RAX,%RDI,8) |
(96) 0x8da3 LEA 0x1(%RDX),%RAX |
(96) 0x8da7 CMP -0x58(%RBP),%RDX |
(96) 0x8dab MOV %RAX,%RDX |
(96) 0x8dae JE 8cc9 |
(96) 0x8db4 MOV -0x60(%RBP),%RAX |
(96) 0x8db8 ADD %RDX,%RAX |
(96) 0x8dbb MOV -0x70(%RBP),%RCX |
(96) 0x8dbf MOV (%RCX,%RAX,8),%RDI |
(96) 0x8dc3 MOV -0x68(%RBP),%RAX |
(96) 0x8dc7 MOV (%RAX,%RDI,8),%R8 |
(96) 0x8dcb MOV 0x8(%RAX,%RDI,8),%RAX |
(96) 0x8dd0 XORPD %XMM0,%XMM0 |
(96) 0x8dd4 SUB %R8,%RAX |
(96) 0x8dd7 JLE 8d95 |
(96) 0x8dd9 CMP $0x8,%RAX |
(96) 0x8ddd JB 8ea3 |
(96) 0x8de3 MOV %RAX,%R9 |
(96) 0x8de6 SHR $0x3,%R9 |
(96) 0x8dea LEA (,%R8,8),%R10 |
(96) 0x8df2 NOPW %CS:(%RAX,%RAX,1) |
(97) 0x8e00 MOV 0x10(%R14,%R10,1),%RCX |
(97) 0x8e05 MOVSD (%RBX,%RCX,8),%XMM1 |
(97) 0x8e0a MOV 0x18(%R14,%R10,1),%RCX |
(97) 0x8e0f MOVHPD (%RBX,%RCX,8),%XMM1 |
(97) 0x8e14 MOV 0x30(%R14,%R10,1),%RCX |
(97) 0x8e19 MOVSD (%RBX,%RCX,8),%XMM2 |
(97) 0x8e1e MOV 0x38(%R14,%R10,1),%RCX |
(97) 0x8e23 MOVHPD (%RBX,%RCX,8),%XMM2 |
(97) 0x8e28 MOV (%R14,%R10,1),%RCX |
(97) 0x8e2c MOVSD (%RBX,%RCX,8),%XMM3 |
(97) 0x8e31 MOV 0x8(%R14,%R10,1),%RCX |
(97) 0x8e36 MOVHPD (%RBX,%RCX,8),%XMM3 |
(97) 0x8e3b MOV 0x20(%R14,%R10,1),%RCX |
(97) 0x8e40 MOVSD (%RBX,%RCX,8),%XMM4 |
(97) 0x8e45 MOV 0x28(%R14,%R10,1),%RCX |
(97) 0x8e4a MOVHPD (%RBX,%RCX,8),%XMM4 |
(97) 0x8e4f MOVUPD 0x10(%R12,%R10,1),%XMM5 |
(97) 0x8e56 MULPD %XMM5,%XMM1 |
(97) 0x8e5a MOVUPD 0x30(%R12,%R10,1),%XMM5 |
(97) 0x8e61 MULPD %XMM5,%XMM2 |
(97) 0x8e65 MOVUPD (%R12,%R10,1),%XMM5 |
(97) 0x8e6b ADDPD %XMM1,%XMM2 |
(97) 0x8e6f MOVUPD 0x20(%R12,%R10,1),%XMM1 |
(97) 0x8e76 MULPD %XMM5,%XMM3 |
(97) 0x8e7a MULPD %XMM1,%XMM4 |
(97) 0x8e7e ADDPD %XMM3,%XMM4 |
(97) 0x8e82 ADDPD %XMM2,%XMM4 |
(97) 0x8e86 MOVAPD %XMM4,%XMM1 |
(97) 0x8e8a UNPCKHPD %XMM4,%XMM1 |
(97) 0x8e8e ADDSD %XMM4,%XMM1 |
(97) 0x8e92 ADDSD %XMM1,%XMM0 |
(97) 0x8e96 ADD $0x40,%R10 |
(97) 0x8e9a DEC %R9 |
(97) 0x8e9d JNE 8e00 |
(96) 0x8ea3 MOV %EAX,%ECX |
(96) 0x8ea5 AND $0x7,%ECX |
(96) 0x8ea8 DEC %RCX |
(96) 0x8eab CMP $0x6,%RCX |
(96) 0x8eaf JA 8d95 |
0x8eb5 AND $-0x8,%RAX |
0x8eb9 MOVSXD (%RSI,%RCX,4),%RCX |
0x8ebd ADD %RSI,%RCX |
0x8ec0 JMP %RCX |
0x8ec2 ADD %RAX,%R8 |
0x8ec5 JMP 8d82 |
0x8eca ADD %RAX,%R8 |
0x8ecd JMP 8d6d |
0x8ed2 ADD %RAX,%R8 |
0x8ed5 JMP 8d58 |
0x8eda ADD %RAX,%R8 |
0x8edd JMP 8d43 |
0x8ee2 ADD %RAX,%R8 |
0x8ee5 JMP 8d2e |
0x8eea ADD %RAX,%R8 |
0x8eed JMP 8d19 |
0x8ef2 JL 8cc9 |
0x8ef8 XOR %ECX,%ECX |
0x8efa DEC %R15 |
0x8efd JMP 8f18 |
0x8eff NOP |
(98) 0x8f00 MOV -0x90(%RBP),%RCX |
(98) 0x8f07 LEA 0x1(%RCX),%RAX |
(98) 0x8f0b CMP -0x58(%RBP),%RCX |
(98) 0x8f0f MOV %RAX,%RCX |
(98) 0x8f12 JE 8cc9 |
(98) 0x8f18 MOV -0x60(%RBP),%RAX |
(98) 0x8f1c MOV %RCX,-0x90(%RBP) |
(98) 0x8f23 ADD %RCX,%RAX |
(98) 0x8f26 MOV -0x70(%RBP),%RCX |
(98) 0x8f2a MOV (%RCX,%RAX,8),%RCX |
(98) 0x8f2e MOV -0x68(%RBP),%RAX |
(98) 0x8f32 MOV (%RAX,%RCX,8),%RDX |
(98) 0x8f36 MOV %RCX,-0x48(%RBP) |
(98) 0x8f3a MOV 0x8(%RAX,%RCX,8),%R8 |
(98) 0x8f3f MOV %RDX,%RAX |
(98) 0x8f42 MOV %RDX,-0x30(%RBP) |
(98) 0x8f46 SUB %RDX,%R8 |
(98) 0x8f49 JLE 8f00 |
(98) 0x8f4b MOV -0x30(%RBP),%RAX |
(98) 0x8f4f LEA (,%RAX,8),%RAX |
(98) 0x8f57 MOV %RAX,-0xa0(%RBP) |
(98) 0x8f5e MOV %R8,%RAX |
(98) 0x8f61 SHR $0x2,%RAX |
(98) 0x8f65 MOV %RAX,-0x98(%RBP) |
(98) 0x8f6c MOV %R8,%RAX |
(98) 0x8f6f AND $-0x4,%RAX |
(98) 0x8f73 MOV %RAX,-0x40(%RBP) |
(98) 0x8f77 MOV -0x48(%RBP),%RAX |
(98) 0x8f7b IMUL -0x88(%RBP),%RAX |
(98) 0x8f83 MOV %RAX,-0x48(%RBP) |
(98) 0x8f87 XOR %ECX,%ECX |
(98) 0x8f89 JMP 9025 |
0x8f8e XCHG %AX,%AX |
(99) 0x8f90 MOV -0x30(%RBP),%RAX |
(99) 0x8f94 MOV -0x40(%RBP),%RDX |
(99) 0x8f98 ADD %RDX,%RAX |
(99) 0x8f9b MOV 0x10(%R14,%RAX,8),%RDX |
(99) 0x8fa0 IMUL %R13,%RDX |
(99) 0x8fa4 MOV %RCX,%R9 |
(99) 0x8fa7 IMUL -0x38(%RBP),%R9 |
(99) 0x8fac ADD %R9,%RDX |
(99) 0x8faf MOVSD (%RBX,%RDX,8),%XMM1 |
(99) 0x8fb4 MULSD 0x10(%R12,%RAX,8),%XMM1 |
(99) 0x8fbb ADDSD %XMM1,%XMM0 |
(99) 0x8fbf MOV %RSI,%R15 |
(99) 0x8fc2 MOV 0x8(%R14,%RAX,8),%RDX |
(99) 0x8fc7 IMUL %R13,%RDX |
(99) 0x8fcb ADD %R9,%RDX |
(99) 0x8fce MOVSD (%RBX,%RDX,8),%XMM1 |
(99) 0x8fd3 MULSD 0x8(%R12,%RAX,8),%XMM1 |
(99) 0x8fda ADDSD %XMM1,%XMM0 |
(99) 0x8fde MOV (%R14,%RAX,8),%RDX |
(99) 0x8fe2 IMUL %R13,%RDX |
(99) 0x8fe6 ADD %R9,%RDX |
(99) 0x8fe9 MOVSD (%RBX,%RDX,8),%XMM1 |
(99) 0x8fee MULSD (%R12,%RAX,8),%XMM1 |
(99) 0x8ff4 ADDSD %XMM1,%XMM0 |
(99) 0x8ff8 MOV %RCX,%RAX |
(99) 0x8ffb IMUL -0xa8(%RBP),%RAX |
(99) 0x9003 ADD -0x48(%RBP),%RAX |
(99) 0x9007 MOV -0x78(%RBP),%RDX |
(99) 0x900b ADDSD (%RDX,%RAX,8),%XMM0 |
(99) 0x9010 MOVSD %XMM0,(%RDX,%RAX,8) |
(99) 0x9015 LEA 0x1(%RCX),%RAX |
(99) 0x9019 CMP %R15,%RCX |
(99) 0x901c MOV %RAX,%RCX |
(99) 0x901f JE 8f00 |
(99) 0x9025 MOV %R15,%RSI |
(99) 0x9028 XORPD %XMM0,%XMM0 |
(99) 0x902c CMP $0x4,%R8 |
(99) 0x9030 JB 90c7 |
(99) 0x9036 MOV %RCX,%RAX |
(99) 0x9039 IMUL -0x38(%RBP),%RAX |
(99) 0x903e MOV -0x98(%RBP),%R10 |
(99) 0x9045 MOV -0xa0(%RBP),%R9 |
(99) 0x904c NOPL (%RAX) |
(100) 0x9050 MOV (%R14,%R9,1),%RDX |
(100) 0x9054 IMUL %R13,%RDX |
(100) 0x9058 ADD %RAX,%RDX |
(100) 0x905b MOV 0x8(%R14,%R9,1),%R11 |
(100) 0x9060 IMUL %R13,%R11 |
(100) 0x9064 ADD %RAX,%R11 |
(100) 0x9067 MOV 0x10(%R14,%R9,1),%RDI |
(100) 0x906c IMUL %R13,%RDI |
(100) 0x9070 ADD %RAX,%RDI |
(100) 0x9073 MOV 0x18(%R14,%R9,1),%R15 |
(100) 0x9078 IMUL %R13,%R15 |
(100) 0x907c ADD %RAX,%R15 |
(100) 0x907f MOVUPD (%R12,%R9,1),%XMM1 |
(100) 0x9085 MOVUPD 0x10(%R12,%R9,1),%XMM2 |
(100) 0x908c MOVSD (%RBX,%RDX,8),%XMM3 |
(100) 0x9091 MOVHPD (%RBX,%R11,8),%XMM3 |
(100) 0x9097 MULPD %XMM1,%XMM3 |
(100) 0x909b MOVSD (%RBX,%RDI,8),%XMM1 |
(100) 0x90a0 MOVHPD (%RBX,%R15,8),%XMM1 |
(100) 0x90a6 MULPD %XMM2,%XMM1 |
(100) 0x90aa ADDPD %XMM3,%XMM1 |
(100) 0x90ae MOVAPD %XMM1,%XMM2 |
(100) 0x90b2 UNPCKHPD %XMM1,%XMM2 |
(100) 0x90b6 ADDSD %XMM1,%XMM2 |
(100) 0x90ba ADDSD %XMM2,%XMM0 |
(100) 0x90be ADD $0x20,%R9 |
(100) 0x90c2 DEC %R10 |
(100) 0x90c5 JNE 9050 |
(99) 0x90c7 MOV %R8D,%EAX |
(99) 0x90ca AND $0x3,%EAX |
(99) 0x90cd CMP $0x1,%RAX |
(99) 0x90d1 JE 9100 |
(99) 0x90d3 CMP $0x3,%EAX |
(99) 0x90d6 JE 8f90 |
(99) 0x90dc CMP $0x2,%EAX |
(99) 0x90df MOV %RSI,%R15 |
(99) 0x90e2 JNE 8ff8 |
(99) 0x90e8 MOV -0x30(%RBP),%RAX |
(99) 0x90ec MOV -0x40(%RBP),%RDX |
(99) 0x90f0 ADD %RDX,%RAX |
(99) 0x90f3 MOV %RCX,%R9 |
(99) 0x90f6 IMUL -0x38(%RBP),%R9 |
(99) 0x90fb JMP 8fc2 |
(99) 0x9100 MOV -0x30(%RBP),%RAX |
(99) 0x9104 MOV -0x40(%RBP),%RDX |
(99) 0x9108 ADD %RDX,%RAX |
(99) 0x910b MOV %RCX,%R9 |
(99) 0x910e IMUL -0x38(%RBP),%R9 |
(99) 0x9113 MOV %RSI,%R15 |
(99) 0x9116 JMP 8fde |
0x911b NOPL (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►61.90+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | hypre_CSRMatrixMatvecOutOfPlac[...] | csr_matop.c:645 | libseq_mv.so |
○ | hypre_CSRMatrixMatvecOutOfPlac[...] | csr_matvec.c:166 | libseq_mv.so |
○ | hypre_ParCSRMatrixMatvecOutOfP[...] | HYPRE_parcsr_matrix.c:361 | libparcsr_mv.so |
○ | hypre_ParCSRMatrixMatvecOutOfP[...] | par_csr_matvec.c:178 | libparcsr_mv.so |
○ | hypre_BoomerAMGCycle.A | ams.c:3855 | libparcsr_ls.so |
○ | hypre_BoomerAMGSolve.A | ams.c:3855 | libparcsr_ls.so |
○ | hypre_BoomerAMGSolve | ams.c:3550 | libparcsr_ls.so |
○ | hypre_PCGSolve.A | gmres.c:1245 | libkrylov.so |
○ | hypre_PCGSolve | pcg.c:479 | libkrylov.so |
○ | main.A | amg.c:419 | exec |
○ | __libc_start_call_main | libc.so.6 | |
►17.53+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | hypre_CSRMatrixMatvecOutOfPlac[...] | csr_matop.c:645 | libseq_mv.so |
○ | hypre_CSRMatrixMatvecOutOfPlac[...] | csr_matvec.c:166 | libseq_mv.so |
○ | hypre_ParCSRMatrixMatvecOutOfP[...] | HYPRE_parcsr_matrix.c:361 | libparcsr_mv.so |
○ | hypre_ParCSRMatrixMatvecOutOfP[...] | par_csr_matvec.c:178 | libparcsr_mv.so |
○ | hypre_PCGSolve.A | gmres.c:1245 | libkrylov.so |
○ | hypre_PCGSolve | pcg.c:479 | libkrylov.so |
○ | main.A | amg.c:419 | exec |
○ | __libc_start_call_main | libc.so.6 | |
►15.15+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | hypre_CSRMatrixMatvecOutOfPlac[...] | csr_matop.c:645 | libseq_mv.so |
○ | hypre_CSRMatrixMatvecOutOfPlac[...] | csr_matvec.c:166 | libseq_mv.so |
○ | hypre_ParCSRMatrixMatvecOutOfP[...] | HYPRE_parcsr_matrix.c:361 | libparcsr_mv.so |
○ | hypre_ParCSRMatrixMatvecOutOfP[...] | par_csr_matvec.c:178 | libparcsr_mv.so |
○ | hypre_BoomerAMGCycle.A | ams.c:3855 | libparcsr_ls.so |
○ | hypre_BoomerAMGSolve.A | ams.c:3855 | libparcsr_ls.so |
○ | hypre_BoomerAMGSolve | ams.c:3550 | libparcsr_ls.so |
○ | hypre_PCGSolve.A | gmres.c:1245 | libkrylov.so |
○ | hypre_PCGSolve | pcg.c:479 | libkrylov.so |
○ | main.A | amg.c:419 | exec |
○ | __libc_start_call_main | libc.so.6 | |
►2.81+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | hypre_CSRMatrixMatvecOutOfPlac[...] | csr_matop.c:645 | libseq_mv.so |
○ | hypre_CSRMatrixMatvecOutOfPlac[...] | csr_matvec.c:166 | libseq_mv.so |
○ | hypre_ParCSRMatrixMatvecOutOfP[...] | HYPRE_parcsr_matrix.c:361 | libparcsr_mv.so |
○ | hypre_ParCSRMatrixMatvecOutOfP[...] | par_csr_matvec.c:178 | libparcsr_mv.so |
○ | hypre_BoomerAMGCycle.A | ams.c:3855 | libparcsr_ls.so |
○ | hypre_BoomerAMGSolve.A | ams.c:3855 | libparcsr_ls.so |
○ | hypre_BoomerAMGSolve | ams.c:3550 | libparcsr_ls.so |
○ | hypre_PCGSolve.A | gmres.c:1245 | libkrylov.so |
○ | hypre_PCGSolve | pcg.c:479 | libkrylov.so |
○ | main.A | amg.c:419 | exec |
○ | __libc_start_call_main | libc.so.6 | |
►1.30+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | hypre_CSRMatrixMatvecOutOfPlac[...] | csr_matop.c:645 | libseq_mv.so |
○ | hypre_CSRMatrixMatvecOutOfPlac[...] | csr_matvec.c:166 | libseq_mv.so |
○ | hypre_ParCSRMatrixMatvecOutOfP[...] | HYPRE_parcsr_matrix.c:361 | libparcsr_mv.so |
○ | hypre_ParCSRMatrixMatvecOutOfP[...] | par_csr_matvec.c:178 | libparcsr_mv.so |
○ | hypre_BoomerAMGCycle.A | ams.c:3855 | libparcsr_ls.so |
○ | hypre_BoomerAMGSolve.A | ams.c:3855 | libparcsr_ls.so |
○ | hypre_BoomerAMGSolve | ams.c:3550 | libparcsr_ls.so |
○ | hypre_PCGSolve.A | gmres.c:1245 | libkrylov.so |
○ | hypre_PCGSolve | pcg.c:479 | libkrylov.so |
○ | main.A | amg.c:419 | exec |
○ | __libc_start_call_main | libc.so.6 | |
►1.30+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | hypre_CSRMatrixMatvecOutOfPlac[...] | csr_matop.c:645 | libseq_mv.so |
○ | hypre_CSRMatrixMatvecOutOfPlac[...] | csr_matvec.c:166 | libseq_mv.so |
○ | hypre_ParCSRMatrixMatvecOutOfP[...] | HYPRE_parcsr_matrix.c:361 | libparcsr_mv.so |
○ | hypre_ParCSRMatrixMatvecOutOfP[...] | par_csr_matvec.c:178 | libparcsr_mv.so |
○ | hypre_PCGSolve.A | gmres.c:1245 | libkrylov.so |
○ | hypre_PCGSolve | pcg.c:479 | libkrylov.so |
○ | main.A | amg.c:419 | exec |
○ | __libc_start_call_main | libc.so.6 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | csr_matvec.c:178-204 |
Module | libseq_mv.so |
nb instructions | 113 |
nb uops | 111 |
loop length | 471 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 20 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 8.00 | 8.00 | 8.00 | 7.00 | 15.33 | 15.33 | 15.33 | 3.50 | 3.50 | 3.50 | 3.50 | 0.00 | 0.00 |
cycles | 8.00 | 8.00 | 8.00 | 8.00 | 7.00 | 15.33 | 15.33 | 15.33 | 3.50 | 3.50 | 3.50 | 3.50 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 18.50 |
Dispatch | 15.33 |
Overall L1 | 18.50 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 10% |
load | 12% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 11% |
load | 12% |
store | 10% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV %R9,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R8,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %RCX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x38(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x20(%RBP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVL $0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVQ $0,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVQ $0x1,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
LEA -0xb0(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x117ee(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x50(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x80(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x50(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %ESI,-0x4c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
CALL 40c0 <__kmpc_for_static_init_8@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV -0x80(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x50(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
SUB %RAX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RCX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JAE 8ce9 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0xd9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA 0x117d0(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV -0x4c(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
ADD $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
JMP 4030 <__kmpc_for_static_fini@plt> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
CMP $0x1,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 8ef2 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x2e2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
LEA 0xb48c(%RIP),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8db4 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x1a4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x30(%R14,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSD (%RBX,%RAX,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MULSD 0x30(%R12,%R8,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
ADDSD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV 0x28(%R14,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSD (%RBX,%RAX,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MULSD 0x28(%R12,%R8,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
ADDSD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV 0x20(%R14,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSD (%RBX,%RAX,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MULSD 0x20(%R12,%R8,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
ADDSD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV 0x18(%R14,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSD (%RBX,%RAX,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MULSD 0x18(%R12,%R8,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
ADDSD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV 0x10(%R14,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSD (%RBX,%RAX,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MULSD 0x10(%R12,%R8,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
ADDSD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV 0x8(%R14,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSD (%RBX,%RAX,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MULSD 0x8(%R12,%R8,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
ADDSD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV (%R14,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSD (%RBX,%RAX,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MULSD (%R12,%R8,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
ADDSD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
AND $-0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOVSXD (%RSI,%RCX,4),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
ADD %RSI,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP %RCX | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8d82 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x172> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8d6d <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x15d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8d58 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x148> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8d43 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x133> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8d2e <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x11e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8d19 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x109> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
JL 8cc9 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0xb9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
DEC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8f18 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x308> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
Source file and lines | csr_matvec.c:178-204 |
Module | libseq_mv.so |
nb instructions | 113 |
nb uops | 111 |
loop length | 471 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 20 |
ADD-SUB / MUL ratio | 1.00 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.00 | 8.00 | 8.00 | 8.00 | 7.00 | 15.33 | 15.33 | 15.33 | 3.50 | 3.50 | 3.50 | 3.50 | 0.00 | 0.00 |
cycles | 8.00 | 8.00 | 8.00 | 8.00 | 7.00 | 15.33 | 15.33 | 15.33 | 3.50 | 3.50 | 3.50 | 3.50 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 18.50 |
Dispatch | 15.33 |
Overall L1 | 18.50 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 10% |
load | 12% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 8% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 11% |
load | 12% |
store | 10% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 8% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV %R9,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R8,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %RCX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RDX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x38(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x20(%RBP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x18(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x10(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVL $0,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVQ $0,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVQ $0x1,-0xb0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
LEA -0xb0(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x117ee(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x50(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x80(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x50(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %ESI,-0x4c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (6.3%) |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
CALL 40c0 <__kmpc_for_static_init_8@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV -0x80(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x50(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
SUB %RAX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RCX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JAE 8ce9 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0xd9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA 0x117d0(%RIP),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV -0x4c(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
ADD $0x88,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
JMP 4030 <__kmpc_for_static_fini@plt> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
CMP $0x1,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 8ef2 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x2e2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
LEA 0xb48c(%RIP),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8db4 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x1a4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x30(%R14,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSD (%RBX,%RAX,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MULSD 0x30(%R12,%R8,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
ADDSD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV 0x28(%R14,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSD (%RBX,%RAX,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MULSD 0x28(%R12,%R8,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
ADDSD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV 0x20(%R14,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSD (%RBX,%RAX,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MULSD 0x20(%R12,%R8,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
ADDSD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV 0x18(%R14,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSD (%RBX,%RAX,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MULSD 0x18(%R12,%R8,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
ADDSD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV 0x10(%R14,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSD (%RBX,%RAX,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MULSD 0x10(%R12,%R8,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
ADDSD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV 0x8(%R14,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSD (%RBX,%RAX,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MULSD 0x8(%R12,%R8,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
ADDSD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV (%R14,%R8,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVSD (%RBX,%RAX,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MULSD (%R12,%R8,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
ADDSD %XMM1,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
AND $-0x8,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOVSXD (%RSI,%RCX,4),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
ADD %RSI,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP %RCX | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8d82 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x172> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8d6d <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x15d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8d58 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x148> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8d43 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x133> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8d2e <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x11e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
ADD %RAX,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8d19 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x109> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
JL 8cc9 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0xb9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
DEC %R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 8f18 <hypre_CSRMatrixMatvecOutOfPlace.extracted.19+0x308> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
Run 8x1 | Number processes: 8Number nodes: 1Run Command: <executable> -n 400 400 400MPI Command: mpirun -n <number_processes>Dataset: Run Directory: /home/eoseret/qaas_runs_CPU_9468/172-019-1763/intel/AMG/run/oneview_runs/multicore/icx_9/oneview_run_1720211326OMP_PROC_BIND: spreadI_MPI_PIN_DOMAIN: auto:scatterOMP_PLACES: threadsOMP_NUM_THREADS: 1 |
---|---|
Run 8x2 | Number processes: 8OMP_NUM_THREADS: 2OMP_PROC_BIND: spreadI_MPI_PIN_DOMAIN: auto:scatterOMP_PLACES: threads |
Run 8x4 | Number processes: 8OMP_NUM_THREADS: 4OMP_PROC_BIND: spreadI_MPI_PIN_DOMAIN: auto:scatterOMP_PLACES: threads |
Run 8x8 | Number processes: 8OMP_NUM_THREADS: 8OMP_PROC_BIND: spreadI_MPI_PIN_DOMAIN: auto:scatterOMP_PLACES: threads |
Run 8x16 | Number processes: 8OMP_NUM_THREADS: 16OMP_PROC_BIND: spreadI_MPI_PIN_DOMAIN: auto:scatterOMP_PLACES: threads |
Run 8x24 | Number processes: 8OMP_NUM_THREADS: 24OMP_PROC_BIND: spreadI_MPI_PIN_DOMAIN: auto:scatterOMP_PLACES: threads |
(8x1) Efficiency | (8x1) Potential Speed-Up (%) | (8x2) Efficiency | (8x2) Potential Speed-Up (%) | (8x4) Efficiency | (8x4) Potential Speed-Up (%) | (8x8) Efficiency | (8x8) Potential Speed-Up (%) | (8x16) Efficiency | (8x16) Potential Speed-Up (%) | (8x24) Efficiency | (8x24) Potential Speed-Up (%) |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 4.55 | 0 | 6.83 | 0 | 9.35 | 0 | 8.46 | 0 | 6.98 | 0 |
Run | Number of threads | Efficiency (ideal is 1) | Speedup | Ideal Speedup | Time (s) | Coverage (%) |
---|---|---|---|---|---|---|
8x1 | 8 | 1 | 1 | 1 | 0.35499998927116 | 0.085637591779232 |
8x2 | 16 | 4.55 | 4.55 | 2 | 0.10000002384186 | 0.036999501287937 |
8x4 | 32 | 6.83 | 6.83 | 4 | 0.069999992847443 | 0.034758295863867 |
8x8 | 64 | 9.35 | 9.35 | 8 | 0.064999990165234 | 0.034897960722446 |
8x16 | 128 | 8.46 | 8.46 | 16 | 0.074999988079071 | 0.041863787919283 |
8x24 | 192 | 6.98 | 6.98 | 24 | 0.074999995529652 | 0.055595945566893 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_CSRMatrixMatvecOutOfPlace.extracted.19– | 0.09 | 0.29 |
▼Loop 96 - csr_matvec.c:178-196 - libseq_mv.so– | 0.03 | 0.10 |
○Loop 97 - csr_matvec.c:194-195 - libseq_mv.so | 0.01 | 0.02 |
▼Loop 98 - csr_matvec.c:178-204 - libseq_mv.so– | 0.00 | 0.00 |
▼Loop 99 - csr_matvec.c:199-204 - libseq_mv.so– | 0.10 | 0.35 |
○Loop 100 - csr_matvec.c:202-203 - libseq_mv.so | 0.00 | 0.00 |