Function: hypre_CSRMatrixMatvecT._omp_fn.3 | Module: libseq_mv.so | Source: csr_matvec.c:554-579 [...] | Coverage: 1.29% |
---|
Function: hypre_CSRMatrixMatvecT._omp_fn.3 | Module: libseq_mv.so | Source: csr_matvec.c:554-579 [...] | Coverage: 1.29% |
---|
/home/eoseret/qaas_runs_CPU_9468/172-019-1763/intel/AMG/build/AMG/AMG/seq_mv/csr_matvec.c: 554 - 579 |
-------------------------------------------------------------------------------- |
554: #pragma omp parallel private(i,jj,j,my_thread_num,offset) |
555: #endif |
556: { |
557: my_thread_num = hypre_GetThreadNum(); |
558: offset = y_size*my_thread_num; |
[...] |
564: for (jj = A_i[i]; jj < A_i[i+1]; jj++) |
565: { |
566: j = A_j[jj]; |
567: y_data_expand[offset + j] += A_data[jj] * x_data[i]; |
[...] |
577: for (j = 0; j < num_threads; j++) |
578: { |
579: y_data[i] += y_data_expand[j*y_size + i]; |
0x5170 PUSH %RBP |
0x5171 MOV %RSP,%RBP |
0x5174 PUSH %R15 |
0x5176 PUSH %R14 |
0x5178 PUSH %R13 |
0x517a PUSH %R12 |
0x517c PUSH %RBX |
0x517d SUB $0x38,%RSP |
0x5181 MOV 0x28(%RDI),%RDX |
0x5185 MOV 0x10(%RDI),%RSI |
0x5189 MOV 0x20(%RDI),%R10 |
0x518d MOV 0x30(%RDI),%RAX |
0x5191 MOV 0x18(%RDI),%R15 |
0x5195 MOV 0x40(%RDI),%R13 |
0x5199 MOV %RDX,-0x60(%RBP) |
0x519d MOV 0x38(%RDI),%RBX |
0x51a1 MOV 0x8(%RDI),%R14 |
0x51a5 MOV %RSI,-0x50(%RBP) |
0x51a9 MOV (%RDI),%RDI |
0x51ac MOV %R10,-0x58(%RBP) |
0x51b0 MOV %RAX,-0x40(%RBP) |
0x51b4 MOV %RDI,-0x48(%RBP) |
0x51b8 CALL 2110 <hypre_GetThreadNum@plt> |
0x51bd MOV %RAX,%R12 |
0x51c0 CALL 2050 <omp_get_num_threads@plt> |
0x51c5 CLTQ |
0x51c7 MOV %RAX,-0x38(%RBP) |
0x51cb CALL 2100 <omp_get_thread_num@plt> |
0x51d0 MOV -0x48(%RBP),%R11 |
0x51d4 MOV -0x50(%RBP),%RSI |
0x51d8 MOVSXD %EAX,%R9 |
0x51db MOV %R15,%RAX |
0x51de MOV -0x58(%RBP),%R8 |
0x51e2 CQTO |
0x51e4 IDIVQ -0x38(%RBP) |
0x51e8 CMP %RDX,%R9 |
0x51eb MOV %RAX,%RCX |
0x51ee JL 5630 |
0x51f4 MOV %RCX,%R10 |
0x51f7 IMUL %R9,%R10 |
0x51fb ADD %R10,%RDX |
0x51fe ADD %RDX,%RCX |
0x5201 CMP %RCX,%RDX |
0x5204 JGE 548f |
0x520a MOV -0x40(%RBP),%R15 |
0x520e SAL $0x3,%RDX |
0x5212 LEA (%R8,%RCX,8),%RDI |
0x5216 ADD %RDX,%R14 |
0x5219 ADD %R8,%RDX |
0x521c IMUL %R15,%R12 |
(32) 0x5220 MOV (%R14),%RAX |
(32) 0x5223 MOV 0x8(%R14),%R8 |
(32) 0x5227 CMP %R8,%RAX |
(32) 0x522a JGE 547e |
(32) 0x5230 MOV %R8,%RCX |
(32) 0x5233 SUB %RAX,%RCX |
(32) 0x5236 AND $0x7,%ECX |
(32) 0x5239 JE 5369 |
(32) 0x523f CMP $0x1,%RCX |
(32) 0x5243 JE 533d |
(32) 0x5249 CMP $0x2,%RCX |
(32) 0x524d JE 531c |
(32) 0x5253 CMP $0x3,%RCX |
(32) 0x5257 JE 52f9 |
(32) 0x525d CMP $0x4,%RCX |
(32) 0x5261 JE 52d6 |
(32) 0x5263 CMP $0x5,%RCX |
(32) 0x5267 JE 52b5 |
(32) 0x5269 CMP $0x6,%RCX |
(32) 0x526d JE 5292 |
(32) 0x526f MOV (%RSI,%RAX,8),%R10 |
(32) 0x5273 VMOVSD (%R11,%RAX,8),%XMM0 |
(32) 0x5279 INC %RAX |
(32) 0x527c ADD %R12,%R10 |
(32) 0x527f LEA (%RBX,%R10,8),%R15 |
(32) 0x5283 VMOVSD (%R15),%XMM7 |
(32) 0x5288 VFMADD132SD (%RDX),%XMM7,%XMM0 |
(32) 0x528d VMOVSD %XMM0,(%R15) |
(32) 0x5292 MOV (%RSI,%RAX,8),%RCX |
(32) 0x5296 VMOVSD (%R11,%RAX,8),%XMM1 |
(32) 0x529c INC %RAX |
(32) 0x529f ADD %R12,%RCX |
(32) 0x52a2 LEA (%RBX,%RCX,8),%R10 |
(32) 0x52a6 VMOVSD (%R10),%XMM2 |
(32) 0x52ab VFMADD132SD (%RDX),%XMM2,%XMM1 |
(32) 0x52b0 VMOVSD %XMM1,(%R10) |
(32) 0x52b5 MOV (%RSI,%RAX,8),%R15 |
(32) 0x52b9 VMOVSD (%R11,%RAX,8),%XMM3 |
(32) 0x52bf INC %RAX |
(32) 0x52c2 ADD %R12,%R15 |
(32) 0x52c5 LEA (%RBX,%R15,8),%RCX |
(32) 0x52c9 VMOVSD (%RCX),%XMM6 |
(32) 0x52cd VFMADD132SD (%RDX),%XMM6,%XMM3 |
(32) 0x52d2 VMOVSD %XMM3,(%RCX) |
(32) 0x52d6 MOV (%RSI,%RAX,8),%R10 |
(32) 0x52da VMOVSD (%R11,%RAX,8),%XMM4 |
(32) 0x52e0 INC %RAX |
(32) 0x52e3 ADD %R12,%R10 |
(32) 0x52e6 LEA (%RBX,%R10,8),%R15 |
(32) 0x52ea VMOVSD (%R15),%XMM5 |
(32) 0x52ef VFMADD132SD (%RDX),%XMM5,%XMM4 |
(32) 0x52f4 VMOVSD %XMM4,(%R15) |
(32) 0x52f9 MOV (%RSI,%RAX,8),%RCX |
(32) 0x52fd VMOVSD (%R11,%RAX,8),%XMM8 |
(32) 0x5303 INC %RAX |
(32) 0x5306 ADD %R12,%RCX |
(32) 0x5309 LEA (%RBX,%RCX,8),%R10 |
(32) 0x530d VMOVSD (%R10),%XMM9 |
(32) 0x5312 VFMADD132SD (%RDX),%XMM9,%XMM8 |
(32) 0x5317 VMOVSD %XMM8,(%R10) |
(32) 0x531c MOV (%RSI,%RAX,8),%R15 |
(32) 0x5320 VMOVSD (%R11,%RAX,8),%XMM10 |
(32) 0x5326 INC %RAX |
(32) 0x5329 ADD %R12,%R15 |
(32) 0x532c LEA (%RBX,%R15,8),%RCX |
(32) 0x5330 VMOVSD (%RCX),%XMM11 |
(32) 0x5334 VFMADD132SD (%RDX),%XMM11,%XMM10 |
(32) 0x5339 VMOVSD %XMM10,(%RCX) |
(32) 0x533d MOV (%RSI,%RAX,8),%R10 |
(32) 0x5341 VMOVSD (%R11,%RAX,8),%XMM12 |
(32) 0x5347 INC %RAX |
(32) 0x534a ADD %R12,%R10 |
(32) 0x534d LEA (%RBX,%R10,8),%R15 |
(32) 0x5351 VMOVSD (%R15),%XMM13 |
(32) 0x5356 VFMADD132SD (%RDX),%XMM13,%XMM12 |
(32) 0x535b VMOVSD %XMM12,(%R15) |
(32) 0x5360 CMP %R8,%RAX |
(32) 0x5363 JE 547e |
(33) 0x5369 MOV (%RSI,%RAX,8),%RCX |
(33) 0x536d VMOVSD (%R11,%RAX,8),%XMM14 |
(33) 0x5373 MOV 0x8(%RSI,%RAX,8),%R15 |
(33) 0x5378 ADD %R12,%RCX |
(33) 0x537b LEA (%RBX,%RCX,8),%R10 |
(33) 0x537f ADD %R12,%R15 |
(33) 0x5382 VMOVSD (%R10),%XMM15 |
(33) 0x5387 VFMADD132SD (%RDX),%XMM15,%XMM14 |
(33) 0x538c LEA (%RBX,%R15,8),%RCX |
(33) 0x5390 VMOVSD %XMM14,(%R10) |
(33) 0x5395 VMOVSD 0x8(%R11,%RAX,8),%XMM0 |
(33) 0x539c VMOVSD (%RCX),%XMM7 |
(33) 0x53a0 VFMADD132SD (%RDX),%XMM7,%XMM0 |
(33) 0x53a5 MOV 0x10(%RSI,%RAX,8),%R10 |
(33) 0x53aa ADD %R12,%R10 |
(33) 0x53ad LEA (%RBX,%R10,8),%R15 |
(33) 0x53b1 VMOVSD %XMM0,(%RCX) |
(33) 0x53b5 VMOVSD 0x10(%R11,%RAX,8),%XMM1 |
(33) 0x53bc VMOVSD (%R15),%XMM2 |
(33) 0x53c1 VFMADD132SD (%RDX),%XMM2,%XMM1 |
(33) 0x53c6 MOV 0x18(%RSI,%RAX,8),%RCX |
(33) 0x53cb ADD %R12,%RCX |
(33) 0x53ce LEA (%RBX,%RCX,8),%R10 |
(33) 0x53d2 VMOVSD %XMM1,(%R15) |
(33) 0x53d7 VMOVSD (%R10),%XMM6 |
(33) 0x53dc MOV 0x20(%RSI,%RAX,8),%R15 |
(33) 0x53e1 VMOVSD 0x18(%R11,%RAX,8),%XMM3 |
(33) 0x53e8 VFMADD132SD (%RDX),%XMM6,%XMM3 |
(33) 0x53ed ADD %R12,%R15 |
(33) 0x53f0 LEA (%RBX,%R15,8),%RCX |
(33) 0x53f4 VMOVSD %XMM3,(%R10) |
(33) 0x53f9 VMOVSD 0x20(%R11,%RAX,8),%XMM4 |
(33) 0x5400 VMOVSD (%RCX),%XMM5 |
(33) 0x5404 VFMADD132SD (%RDX),%XMM5,%XMM4 |
(33) 0x5409 MOV 0x28(%RSI,%RAX,8),%R10 |
(33) 0x540e ADD %R12,%R10 |
(33) 0x5411 LEA (%RBX,%R10,8),%R15 |
(33) 0x5415 VMOVSD %XMM4,(%RCX) |
(33) 0x5419 VMOVSD 0x28(%R11,%RAX,8),%XMM8 |
(33) 0x5420 VMOVSD (%R15),%XMM9 |
(33) 0x5425 VFMADD132SD (%RDX),%XMM9,%XMM8 |
(33) 0x542a MOV 0x30(%RSI,%RAX,8),%RCX |
(33) 0x542f ADD %R12,%RCX |
(33) 0x5432 LEA (%RBX,%RCX,8),%R10 |
(33) 0x5436 VMOVSD %XMM8,(%R15) |
(33) 0x543b VMOVSD 0x30(%R11,%RAX,8),%XMM10 |
(33) 0x5442 VMOVSD (%R10),%XMM11 |
(33) 0x5447 VFMADD132SD (%RDX),%XMM11,%XMM10 |
(33) 0x544c MOV 0x38(%RSI,%RAX,8),%R15 |
(33) 0x5451 ADD %R12,%R15 |
(33) 0x5454 LEA (%RBX,%R15,8),%RCX |
(33) 0x5458 VMOVSD %XMM10,(%R10) |
(33) 0x545d VMOVSD 0x38(%R11,%RAX,8),%XMM12 |
(33) 0x5464 VMOVSD (%RCX),%XMM13 |
(33) 0x5468 ADD $0x8,%RAX |
(33) 0x546c VFMADD132SD (%RDX),%XMM13,%XMM12 |
(33) 0x5471 VMOVSD %XMM12,(%RCX) |
(33) 0x5475 CMP %R8,%RAX |
(33) 0x5478 JNE 5369 |
(32) 0x547e ADD $0x8,%RDX |
(32) 0x5482 ADD $0x8,%R14 |
(32) 0x5486 CMP %RDX,%RDI |
(32) 0x5489 JNE 5220 |
0x548f MOV %R9,-0x48(%RBP) |
0x5493 CALL 2150 <GOMP_barrier@plt> |
0x5498 MOV -0x40(%RBP),%RAX |
0x549c MOV -0x48(%RBP),%R14 |
0x54a0 CQTO |
0x54a2 IDIVQ -0x38(%RBP) |
0x54a6 CMP %RDX,%R14 |
0x54a9 JL 5626 |
0x54af IMUL %RAX,%R14 |
0x54b3 ADD %R14,%RDX |
0x54b6 ADD %RDX,%RAX |
0x54b9 CMP %RAX,%RDX |
0x54bc JGE 5613 |
0x54c2 TEST %R13,%R13 |
0x54c5 JLE 5613 |
0x54cb MOV -0x60(%RBP),%R11 |
0x54cf LEA (,%RDX,8),%R12 |
0x54d7 ADD %R12,%R11 |
0x54da ADD %RBX,%R12 |
0x54dd MOV -0x40(%RBP),%RBX |
0x54e1 SAL $0x3,%RBX |
0x54e5 NOPL (%RAX) |
(31) 0x54e8 MOV %R13,%R9 |
(31) 0x54eb VMOVSD (%R11),%XMM14 |
(31) 0x54f0 MOV %R12,%RDI |
(31) 0x54f3 XOR %ESI,%ESI |
(31) 0x54f5 AND $0x7,%R9D |
(31) 0x54f9 JE 5596 |
(31) 0x54ff CMP $0x1,%R9 |
(31) 0x5503 JE 5582 |
(31) 0x5505 CMP $0x2,%R9 |
(31) 0x5509 JE 5573 |
(31) 0x550b CMP $0x3,%R9 |
(31) 0x550f JE 5564 |
(31) 0x5511 CMP $0x4,%R9 |
(31) 0x5515 JE 5555 |
(31) 0x5517 CMP $0x5,%R9 |
(31) 0x551b JE 5546 |
(31) 0x551d CMP $0x6,%R9 |
(31) 0x5521 JE 5537 |
(31) 0x5523 VADDSD (%R12),%XMM14,%XMM14 |
(31) 0x5529 MOV $0x1,%ESI |
(31) 0x552e LEA (%R12,%RBX,1),%RDI |
(31) 0x5532 VMOVSD %XMM14,(%R11) |
(31) 0x5537 VADDSD (%RDI),%XMM14,%XMM14 |
(31) 0x553b INC %RSI |
(31) 0x553e ADD %RBX,%RDI |
(31) 0x5541 VMOVSD %XMM14,(%R11) |
(31) 0x5546 VADDSD (%RDI),%XMM14,%XMM14 |
(31) 0x554a INC %RSI |
(31) 0x554d ADD %RBX,%RDI |
(31) 0x5550 VMOVSD %XMM14,(%R11) |
(31) 0x5555 VADDSD (%RDI),%XMM14,%XMM14 |
(31) 0x5559 INC %RSI |
(31) 0x555c ADD %RBX,%RDI |
(31) 0x555f VMOVSD %XMM14,(%R11) |
(31) 0x5564 VADDSD (%RDI),%XMM14,%XMM14 |
(31) 0x5568 INC %RSI |
(31) 0x556b ADD %RBX,%RDI |
(31) 0x556e VMOVSD %XMM14,(%R11) |
(31) 0x5573 VADDSD (%RDI),%XMM14,%XMM14 |
(31) 0x5577 INC %RSI |
(31) 0x557a ADD %RBX,%RDI |
(31) 0x557d VMOVSD %XMM14,(%R11) |
(31) 0x5582 VADDSD (%RDI),%XMM14,%XMM14 |
(31) 0x5586 INC %RSI |
(31) 0x5589 ADD %RBX,%RDI |
(31) 0x558c VMOVSD %XMM14,(%R11) |
(31) 0x5591 CMP %RSI,%R13 |
(31) 0x5594 JE 55ff |
(30) 0x5596 VADDSD (%RDI),%XMM14,%XMM15 |
(30) 0x559a ADD %RBX,%RDI |
(30) 0x559d ADD $0x8,%RSI |
(30) 0x55a1 VMOVSD %XMM15,(%R11) |
(30) 0x55a6 VADDSD (%RDI),%XMM15,%XMM0 |
(30) 0x55aa ADD %RBX,%RDI |
(30) 0x55ad VMOVSD %XMM0,(%R11) |
(30) 0x55b2 VADDSD (%RDI),%XMM0,%XMM7 |
(30) 0x55b6 ADD %RBX,%RDI |
(30) 0x55b9 VMOVSD %XMM7,(%R11) |
(30) 0x55be VADDSD (%RDI),%XMM7,%XMM1 |
(30) 0x55c2 ADD %RBX,%RDI |
(30) 0x55c5 VMOVSD %XMM1,(%R11) |
(30) 0x55ca VADDSD (%RDI),%XMM1,%XMM2 |
(30) 0x55ce ADD %RBX,%RDI |
(30) 0x55d1 VMOVSD %XMM2,(%R11) |
(30) 0x55d6 VADDSD (%RDI),%XMM2,%XMM3 |
(30) 0x55da ADD %RBX,%RDI |
(30) 0x55dd VMOVSD %XMM3,(%R11) |
(30) 0x55e2 VADDSD (%RDI),%XMM3,%XMM6 |
(30) 0x55e6 ADD %RBX,%RDI |
(30) 0x55e9 VMOVSD %XMM6,(%R11) |
(30) 0x55ee VADDSD (%RDI),%XMM6,%XMM14 |
(30) 0x55f2 ADD %RBX,%RDI |
(30) 0x55f5 VMOVSD %XMM14,(%R11) |
(30) 0x55fa CMP %RSI,%R13 |
(30) 0x55fd JNE 5596 |
(31) 0x55ff INC %RDX |
(31) 0x5602 ADD $0x8,%R11 |
(31) 0x5606 ADD $0x8,%R12 |
(31) 0x560a CMP %RDX,%RAX |
(31) 0x560d JNE 54e8 |
0x5613 ADD $0x38,%RSP |
0x5617 POP %RBX |
0x5618 POP %R12 |
0x561a POP %R13 |
0x561c POP %R14 |
0x561e POP %R15 |
0x5620 POP %RBP |
0x5621 JMP 2150 |
0x5626 INC %RAX |
0x5629 XOR %EDX,%EDX |
0x562b JMP 54af |
0x5630 INC %RCX |
0x5633 XOR %EDX,%EDX |
0x5635 JMP 51f4 |
0x563a NOPW (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○95.72 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○4.28 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | csr_matvec.c:554-579 |
Module | libseq_mv.so |
nb instructions | 87 |
nb uops | 90 |
loop length | 310 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.25 | 8.25 | 8.25 | 8.25 | 8.00 | 10.00 | 10.00 | 10.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 8.25 | 8.25 | 8.25 | 8.25 | 8.00 | 10.00 | 10.00 | 10.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 14.00-24.00 |
Front-end | 15.00 |
Dispatch | 10.00 |
DIV/SQRT | 14.00-24.00 |
Overall L1 | 15.00-24.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV 0x28(%RDI),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x10(%RDI),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x20(%RDI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x30(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x18(%RDI),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x40(%RDI),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV %RDX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x38(%RDI),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x8(%RDI),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV %RSI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV (%RDI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R10,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CALL 2110 <hypre_GetThreadNum@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
CALL 2050 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
CLTQ | scal (12.5%) | |||||||||||||||||
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CALL 2100 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOVSXD %EAX,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CQTO | scal (12.5%) | |||||||||||||||||
IDIVQ -0x38(%RBP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-22 | 7-12 | scal (12.5%) |
CMP %RDX,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
JL 5630 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4c0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
IMUL %R9,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
ADD %R10,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RDX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 548f <hypre_CSRMatrixMatvecT._omp_fn.3+0x31f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x40(%RBP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SAL $0x3,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%R8,%RCX,8),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RDX,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
ADD %R8,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
IMUL %R15,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CALL 2150 <GOMP_barrier@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x48(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
CQTO | scal (12.5%) | |||||||||||||||||
IDIVQ -0x38(%RBP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-22 | 7-12 | scal (12.5%) |
CMP %RDX,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JL 5626 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4b6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
IMUL %RAX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
ADD %R14,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RDX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RAX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 5613 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
TEST %R13,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 5613 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x60(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
LEA (,%RDX,8),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
ADD %RBX,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV -0x40(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SAL $0x3,%RBX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
JMP 2150 <GOMP_barrier@plt> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 54af <hypre_CSRMatrixMatvecT._omp_fn.3+0x33f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
INC %RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 51f4 <hypre_CSRMatrixMatvecT._omp_fn.3+0x84> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
Source file and lines | csr_matvec.c:554-579 |
Module | libseq_mv.so |
nb instructions | 87 |
nb uops | 90 |
loop length | 310 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 6 |
micro-operation queue | 15.00 cycles |
front end | 15.00 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.25 | 8.25 | 8.25 | 8.25 | 8.00 | 10.00 | 10.00 | 10.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 8.25 | 8.25 | 8.25 | 8.25 | 8.00 | 10.00 | 10.00 | 10.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 14.00-24.00 |
Front-end | 15.00 |
Dispatch | 10.00 |
DIV/SQRT | 14.00-24.00 |
Overall L1 | 15.00-24.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV 0x28(%RDI),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x10(%RDI),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x20(%RDI),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x30(%RDI),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x18(%RDI),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x40(%RDI),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV %RDX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x38(%RDI),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x8(%RDI),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV %RSI,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV (%RDI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R10,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RDI,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CALL 2110 <hypre_GetThreadNum@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
CALL 2050 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
CLTQ | scal (12.5%) | |||||||||||||||||
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CALL 2100 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV -0x48(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOVSXD %EAX,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV -0x58(%RBP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CQTO | scal (12.5%) | |||||||||||||||||
IDIVQ -0x38(%RBP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-22 | 7-12 | scal (12.5%) |
CMP %RDX,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
JL 5630 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4c0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RCX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
IMUL %R9,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
ADD %R10,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RDX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 548f <hypre_CSRMatrixMatvecT._omp_fn.3+0x31f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x40(%RBP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SAL $0x3,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA (%R8,%RCX,8),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RDX,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
ADD %R8,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
IMUL %R15,%R12 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CALL 2150 <GOMP_barrier@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x48(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
CQTO | scal (12.5%) | |||||||||||||||||
IDIVQ -0x38(%RBP) | 3 | 2 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 10-22 | 7-12 | scal (12.5%) |
CMP %RDX,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JL 5626 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4b6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
IMUL %RAX,%R14 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
ADD %R14,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RDX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RAX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 5613 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
TEST %R13,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 5613 <hypre_CSRMatrixMatvecT._omp_fn.3+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x60(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
LEA (,%RDX,8),%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
ADD %RBX,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV -0x40(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SAL $0x3,%RBX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
ADD $0x38,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
JMP 2150 <GOMP_barrier@plt> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 54af <hypre_CSRMatrixMatvecT._omp_fn.3+0x33f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
INC %RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 51f4 <hypre_CSRMatrixMatvecT._omp_fn.3+0x84> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_CSRMatrixMatvecT._omp_fn.3– | 1.29 | 0.96 |
▼Loop 31 - csr_matvec.c:577-579 - libseq_mv.so– | 0.00 | 0.00 |
○Loop 30 - csr_matvec.c:577-579 - libseq_mv.so | 0.03 | 0.02 |
▼Loop 32 - csr_matvec.c:564-567 - libseq_mv.so– | 0.00 | 0.00 |
○Loop 33 - csr_matvec.c:564-567 - libseq_mv.so | 0.04 | 0.03 |