Function: hypre_CSRMatrixMatvecT | Module: exec | Source: csr_matvec.c:445-648 [...] | Coverage: 2.02% |
---|
Function: hypre_CSRMatrixMatvecT | Module: exec | Source: csr_matvec.c:445-648 [...] | Coverage: 2.02% |
---|
/home/kcamus/qaas_runs/169-443-9681/intel/AMG/build/AMG/AMG/seq_mv/csr_matvec.c: 445 - 648 |
-------------------------------------------------------------------------------- |
445: { |
446: HYPRE_Complex *A_data = hypre_CSRMatrixData(A); |
447: HYPRE_Int *A_i = hypre_CSRMatrixI(A); |
448: HYPRE_Int *A_j = hypre_CSRMatrixJ(A); |
449: HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A); |
450: HYPRE_Int num_cols = hypre_CSRMatrixNumCols(A); |
451: |
452: HYPRE_Complex *x_data = hypre_VectorData(x); |
453: HYPRE_Complex *y_data = hypre_VectorData(y); |
454: HYPRE_Int x_size = hypre_VectorSize(x); |
455: HYPRE_Int y_size = hypre_VectorSize(y); |
456: HYPRE_Int num_vectors = hypre_VectorNumVectors(x); |
457: HYPRE_Int idxstride_y = hypre_VectorIndexStride(y); |
458: HYPRE_Int vecstride_y = hypre_VectorVectorStride(y); |
459: HYPRE_Int idxstride_x = hypre_VectorIndexStride(x); |
460: HYPRE_Int vecstride_x = hypre_VectorVectorStride(x); |
[...] |
485: hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); |
486: |
487: if (num_rows != x_size) |
488: ierr = 1; |
489: |
490: if (num_cols != y_size) |
491: ierr = 2; |
492: |
493: if (num_rows != x_size && num_cols != y_size) |
[...] |
499: if (alpha == 0.0) |
500: { |
501: #ifdef HYPRE_USING_OPENMP |
502: #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE |
503: #endif |
504: for (i = 0; i < num_cols*num_vectors; i++) |
[...] |
510: if (x == y) |
511: { |
512: x_tmp = hypre_SeqVectorCloneDeep(x); |
513: x_data = hypre_VectorData(x_tmp); |
[...] |
520: temp = beta / alpha; |
521: |
522: if (temp != 1.0) |
523: { |
524: if (temp == 0.0) |
525: { |
526: #ifdef HYPRE_USING_OPENMP |
527: #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE |
528: #endif |
529: for (i = 0; i < num_cols*num_vectors; i++) |
[...] |
535: #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE |
536: #endif |
537: for (i = 0; i < num_cols*num_vectors; i++) |
[...] |
545: num_threads = hypre_NumThreads(); |
546: if (num_threads > 1) |
547: { |
548: y_data_expand = hypre_CTAlloc(HYPRE_Complex, num_threads*y_size); |
549: |
550: if ( num_vectors==1 ) |
551: { |
552: |
553: #ifdef HYPRE_USING_OPENMP |
554: #pragma omp parallel private(i,jj,j,my_thread_num,offset) |
[...] |
589: for (i = 0; i < num_rows; i++) |
590: { |
591: for ( jv=0; jv<num_vectors; ++jv ) |
592: { |
593: for (jj = A_i[i]; jj < A_i[i+1]; jj++) |
594: { |
595: j = A_j[jj]; |
596: y_data[ j*idxstride_y + jv*vecstride_y ] += |
597: A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x]; |
[...] |
603: hypre_TFree(y_data_expand); |
604: |
605: } |
606: else |
607: { |
608: for (i = 0; i < num_rows; i++) |
609: { |
610: if ( num_vectors==1 ) |
611: { |
612: for (jj = A_i[i]; jj < A_i[i+1]; jj++) |
613: { |
614: j = A_j[jj]; |
615: y_data[j] += A_data[jj] * x_data[i]; |
616: } |
617: } |
618: else |
619: { |
620: for ( jv=0; jv<num_vectors; ++jv ) |
621: { |
622: for (jj = A_i[i]; jj < A_i[i+1]; jj++) |
623: { |
624: j = A_j[jj]; |
625: y_data[ j*idxstride_y + jv*vecstride_y ] += |
626: A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x ]; |
[...] |
636: if (alpha != 1.0) |
637: { |
638: #ifdef HYPRE_USING_OPENMP |
639: #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE |
640: #endif |
641: for (i = 0; i < num_cols*num_vectors; i++) |
642: y_data[i] *= alpha; |
643: } |
644: |
645: if (x == y) hypre_SeqVectorDestroy(x_tmp); |
646: |
647: return ierr; |
648: } |
0x4d3ad0 PUSH %RBP |
0x4d3ad1 MOV %RSP,%RBP |
0x4d3ad4 PUSH %R15 |
0x4d3ad6 PUSH %R14 |
0x4d3ad8 PUSH %R13 |
0x4d3ada PUSH %R12 |
0x4d3adc PUSH %RBX |
0x4d3add SUB $0xb8,%RSP |
0x4d3ae4 MOV %RSI,%RCX |
0x4d3ae7 MOV 0x30(%RDI),%R12 |
0x4d3aeb MOV (%RDI),%RAX |
0x4d3aee MOV %RAX,-0x48(%RBP) |
0x4d3af2 MOV 0x8(%RDI),%R13 |
0x4d3af6 MOV 0x10(%RDI),%RAX |
0x4d3afa MOV %RAX,-0x40(%RBP) |
0x4d3afe MOV 0x18(%RDI),%RAX |
0x4d3b02 MOV %RAX,-0x90(%RBP) |
0x4d3b09 MOV (%RSI),%R15 |
0x4d3b0c MOV 0x8(%RSI),%RAX |
0x4d3b10 MOV %RAX,-0xc8(%RBP) |
0x4d3b17 MOV (%RDX),%RBX |
0x4d3b1a MOV 0x8(%RDX),%RAX |
0x4d3b1e MOV %RAX,-0x60(%RBP) |
0x4d3b22 MOV 0x18(%RSI),%RAX |
0x4d3b26 MOV 0x30(%RDX),%R14 |
0x4d3b2a MOV 0x28(%RDX),%RSI |
0x4d3b2e MOV %RSI,-0x80(%RBP) |
0x4d3b32 MOV 0x30(%RCX),%RSI |
0x4d3b36 MOV %RSI,-0xd0(%RBP) |
0x4d3b3d MOV 0x28(%RCX),%RSI |
0x4d3b41 MOV %RSI,-0x78(%RBP) |
0x4d3b45 MOV %RAX,-0x38(%RBP) |
0x4d3b49 MOV %RDX,-0xc0(%RBP) |
0x4d3b50 CMP 0x18(%RDX),%RAX |
0x4d3b54 JE 4d3ba6 |
0x4d3b56 MOV 0x59063(%RIP),%RDI |
0x4d3b5d MOV $0x505dec,%ESI |
0x4d3b62 MOV $0x5084a8,%EDX |
0x4d3b67 XOR %EAX,%EAX |
0x4d3b69 MOV %RCX,-0x88(%RBP) |
0x4d3b70 VMOVSD %XMM0,-0x58(%RBP) |
0x4d3b75 VMOVSD %XMM1,-0x30(%RBP) |
0x4d3b7a CALL 4ddb80 <hypre_fprintf> |
0x4d3b7f MOV $0x5084d1,%EDI |
0x4d3b84 MOV $0x1e5,%ESI |
0x4d3b89 MOV $0x1,%EDX |
0x4d3b8e XOR %ECX,%ECX |
0x4d3b90 CALL 4dfac0 <hypre_error_handler> |
0x4d3b95 VMOVSD -0x30(%RBP),%XMM1 |
0x4d3b9a VMOVSD -0x58(%RBP),%XMM0 |
0x4d3b9f MOV -0x88(%RBP),%RCX |
0x4d3ba6 VXORPD %XMM2,%XMM2,%XMM2 |
0x4d3baa VUCOMISD %XMM2,%XMM0 |
0x4d3bae JNE 4d3c06 |
0x4d3bb0 MOV -0x90(%RBP),%R15 |
0x4d3bb7 MOV -0x38(%RBP),%R10 |
0x4d3bbb IMUL %R15,%R10 |
0x4d3bbf TEST %R10,%R10 |
0x4d3bc2 MOV -0xc8(%RBP),%R14 |
0x4d3bc9 MOV -0x60(%RBP),%R12 |
0x4d3bcd JLE 4d4308 |
0x4d3bd3 DEC %R10 |
0x4d3bd6 VMOVQ %XMM1,%RCX |
0x4d3bdb SUB $0x8,%RSP |
0x4d3bdf MOV $0x52c130,%EDI |
0x4d3be4 MOV $0x4d6350,%EDX |
0x4d3be9 MOV $0x4,%ESI |
0x4d3bee MOV %RBX,%R8 |
0x4d3bf1 XOR %R9D,%R9D |
0x4d3bf4 XOR %EAX,%EAX |
0x4d3bf6 PUSH %R10 |
0x4d3bf8 CALL 410390 <__kmpc_fork_call@plt> |
0x4d3bfd ADD $0x10,%RSP |
0x4d3c01 JMP 4d4308 |
0x4d3c06 CMP -0xc0(%RBP),%RCX |
0x4d3c0d MOV %RCX,-0x88(%RBP) |
0x4d3c14 VMOVSD %XMM0,-0x58(%RBP) |
0x4d3c19 JE 4d3c37 |
0x4d3c1b XOR %EAX,%EAX |
0x4d3c1d MOV %RAX,-0xb8(%RBP) |
0x4d3c24 VDIVSD %XMM0,%XMM1,%XMM0 |
0x4d3c28 VUCOMISD 0x213d8(%RIP),%XMM0 |
0x4d3c30 JNE 4d3c69 |
0x4d3c32 JMP 4d3cdb |
0x4d3c37 MOV %RCX,%RDI |
0x4d3c3a VMOVSD %XMM1,-0x30(%RBP) |
0x4d3c3f CALL 4d75f0 <hypre_SeqVectorCloneDeep> |
0x4d3c44 VMOVSD -0x30(%RBP),%XMM1 |
0x4d3c49 VMOVSD -0x58(%RBP),%XMM0 |
0x4d3c4e MOV %RAX,%RCX |
0x4d3c51 MOV %RAX,-0xb8(%RBP) |
0x4d3c58 MOV (%RAX),%R15 |
0x4d3c5b VDIVSD %XMM0,%XMM1,%XMM0 |
0x4d3c5f VUCOMISD 0x213a1(%RIP),%XMM0 |
0x4d3c67 JE 4d3cdb |
0x4d3c69 MOV -0x38(%RBP),%R10 |
0x4d3c6d IMUL -0x90(%RBP),%R10 |
0x4d3c75 VXORPD %XMM1,%XMM1,%XMM1 |
0x4d3c79 VUCOMISD %XMM1,%XMM0 |
0x4d3c7d JNE 4d3ca8 |
0x4d3c7f TEST %R10,%R10 |
0x4d3c82 JLE 4d3cdb |
0x4d3c84 DEC %R10 |
0x4d3c87 MOV $0x52c0d0,%EDI |
0x4d3c8c MOV $0x4d62c0,%EDX |
0x4d3c91 MOV $0x3,%ESI |
0x4d3c96 MOV %RBX,%RCX |
0x4d3c99 XOR %R8D,%R8D |
0x4d3c9c MOV %R10,%R9 |
0x4d3c9f XOR %EAX,%EAX |
0x4d3ca1 CALL 410390 <__kmpc_fork_call@plt> |
0x4d3ca6 JMP 4d3cdb |
0x4d3ca8 TEST %R10,%R10 |
0x4d3cab JLE 4d3cdb |
0x4d3cad DEC %R10 |
0x4d3cb0 VMOVQ %XMM0,%R8 |
0x4d3cb5 SUB $0x8,%RSP |
0x4d3cb9 MOV $0x52c070,%EDI |
0x4d3cbe MOV $0x4d61d0,%EDX |
0x4d3cc3 MOV $0x4,%ESI |
0x4d3cc8 MOV %RBX,%RCX |
0x4d3ccb XOR %R9D,%R9D |
0x4d3cce XOR %EAX,%EAX |
0x4d3cd0 PUSH %R10 |
0x4d3cd2 CALL 410390 <__kmpc_fork_call@plt> |
0x4d3cd7 ADD $0x10,%RSP |
0x4d3cdb CALL 4df9b0 <hypre_NumThreads> |
0x4d3ce0 CMP $0x1,%RAX |
0x4d3ce4 JLE 4d3d51 |
0x4d3ce6 MOV %RAX,-0x30(%RBP) |
0x4d3cea MOV %RAX,%RDI |
0x4d3ced IMUL -0x60(%RBP),%RDI |
0x4d3cf2 MOV $0x8,%ESI |
0x4d3cf7 CALL 4dd8f0 <hypre_CAlloc> |
0x4d3cfc MOV %RAX,%R10 |
0x4d3cff CMPQ $0x1,-0x38(%RBP) |
0x4d3d04 MOV %RAX,-0x70(%RBP) |
0x4d3d08 JNE 4d3e79 |
0x4d3d0e MOV $0x52c010,%EDI |
0x4d3d13 MOV $0x4d5d20,%EDX |
0x4d3d18 MOV $0x9,%ESI |
0x4d3d1d MOV %R12,%RCX |
0x4d3d20 MOV -0x48(%RBP),%R8 |
0x4d3d24 MOV %R13,%R9 |
0x4d3d27 XOR %EAX,%EAX |
0x4d3d29 PUSHQ -0x30(%RBP) |
0x4d3d2c PUSH %R10 |
0x4d3d2e PUSHQ -0x60(%RBP) |
0x4d3d31 PUSH %RBX |
0x4d3d32 PUSH %R15 |
0x4d3d34 PUSHQ -0x40(%RBP) |
0x4d3d37 CALL 410390 <__kmpc_fork_call@plt> |
0x4d3d3c MOV -0x70(%RBP),%R10 |
0x4d3d40 ADD $0x30,%RSP |
0x4d3d44 MOV %R10,%RDI |
0x4d3d47 CALL 4dd9d0 <hypre_Free> |
0x4d3d4c JMP 4d4290 |
0x4d3d51 CMPQ $0,-0x40(%RBP) |
0x4d3d56 JLE 4d4290 |
0x4d3d5c CMPQ $0x1,-0x38(%RBP) |
0x4d3d61 JNE 4d4090 |
0x4d3d67 MOV -0x40(%RBP),%RAX |
0x4d3d6b LEA -0x1(%RAX),%R8 |
0x4d3d6f MOV -0x48(%RBP),%RAX |
0x4d3d73 MOV (%RAX),%R10 |
0x4d3d76 XOR %EDX,%EDX |
0x4d3d78 JMP 4d3d90 |
0x4d3d7a NOPW (%RAX,%RAX,1) |
(3936) 0x4d3d80 LEA 0x1(%RDX),%RAX |
(3936) 0x4d3d84 CMP %R8,%RDX |
(3936) 0x4d3d87 MOV %RAX,%RDX |
(3936) 0x4d3d8a JE 4d4290 |
(3936) 0x4d3d90 MOV %R10,%RSI |
(3936) 0x4d3d93 MOV -0x48(%RBP),%RAX |
(3936) 0x4d3d97 MOV 0x8(%RAX,%RDX,8),%R10 |
(3936) 0x4d3d9c MOV %R10,%R9 |
(3936) 0x4d3d9f SUB %RSI,%R9 |
(3936) 0x4d3da2 JLE 4d3d80 |
(3936) 0x4d3da4 CMP $0x4,%R9 |
(3936) 0x4d3da8 JB 4d3e3c |
(3936) 0x4d3dae MOV %R9,%RDI |
(3936) 0x4d3db1 SHR $0x2,%RDI |
(3936) 0x4d3db5 LEA 0x18(,%RSI,8),%RAX |
(3936) 0x4d3dbd NOPL (%RAX) |
(3938) 0x4d3dc0 VMOVSD (%R15,%RDX,8),%XMM0 |
(3938) 0x4d3dc6 VMOVSD -0x18(%R12,%RAX,1),%XMM1 |
(3938) 0x4d3dcd MOV -0x18(%R13,%RAX,1),%RCX |
(3938) 0x4d3dd2 VFMADD213SD (%RBX,%RCX,8),%XMM0,%XMM1 |
(3938) 0x4d3dd8 VMOVSD %XMM1,(%RBX,%RCX,8) |
(3938) 0x4d3ddd MOV -0x10(%R13,%RAX,1),%RCX |
(3938) 0x4d3de2 VMOVSD (%R15,%RDX,8),%XMM0 |
(3938) 0x4d3de8 VMOVSD -0x10(%R12,%RAX,1),%XMM1 |
(3938) 0x4d3def VFMADD213SD (%RBX,%RCX,8),%XMM0,%XMM1 |
(3938) 0x4d3df5 VMOVSD %XMM1,(%RBX,%RCX,8) |
(3938) 0x4d3dfa MOV -0x8(%R13,%RAX,1),%RCX |
(3938) 0x4d3dff VMOVSD (%R15,%RDX,8),%XMM0 |
(3938) 0x4d3e05 VMOVSD -0x8(%R12,%RAX,1),%XMM1 |
(3938) 0x4d3e0c VFMADD213SD (%RBX,%RCX,8),%XMM0,%XMM1 |
(3938) 0x4d3e12 VMOVSD %XMM1,(%RBX,%RCX,8) |
(3938) 0x4d3e17 MOV (%R13,%RAX,1),%RCX |
(3938) 0x4d3e1c VMOVSD (%R15,%RDX,8),%XMM0 |
(3938) 0x4d3e22 VMOVSD (%R12,%RAX,1),%XMM1 |
(3938) 0x4d3e28 VFMADD213SD (%RBX,%RCX,8),%XMM0,%XMM1 |
(3938) 0x4d3e2e VMOVSD %XMM1,(%RBX,%RCX,8) |
(3938) 0x4d3e33 ADD $0x20,%RAX |
(3938) 0x4d3e37 DEC %RDI |
(3938) 0x4d3e3a JNE 4d3dc0 |
(3936) 0x4d3e3c MOV %R9,%RAX |
(3936) 0x4d3e3f AND $-0x4,%RAX |
(3936) 0x4d3e43 CMP %R9,%RAX |
(3936) 0x4d3e46 JAE 4d3d80 |
(3936) 0x4d3e4c ADD %RAX,%RSI |
(3936) 0x4d3e4f NOP |
(3937) 0x4d3e50 MOV (%R13,%RSI,8),%RAX |
(3937) 0x4d3e55 VMOVSD (%R15,%RDX,8),%XMM0 |
(3937) 0x4d3e5b VMOVSD (%R12,%RSI,8),%XMM1 |
(3937) 0x4d3e61 VFMADD213SD (%RBX,%RAX,8),%XMM0,%XMM1 |
(3937) 0x4d3e67 VMOVSD %XMM1,(%RBX,%RAX,8) |
(3937) 0x4d3e6c INC %RSI |
(3937) 0x4d3e6f CMP %RSI,%R10 |
(3937) 0x4d3e72 JNE 4d3e50 |
(3936) 0x4d3e74 JMP 4d3d80 |
0x4d3e79 CMPQ $0,-0x40(%RBP) |
0x4d3e7e JLE 4d3d44 |
0x4d3e84 CMPQ $0,-0x38(%RBP) |
0x4d3e89 JLE 4d3d44 |
0x4d3e8f MOV -0x40(%RBP),%RAX |
0x4d3e93 DEC %RAX |
0x4d3e96 MOV %RAX,-0xd8(%RBP) |
0x4d3e9d MOV -0x48(%RBP),%RAX |
0x4d3ea1 MOV (%RAX),%R11 |
0x4d3ea4 MOV -0x38(%RBP),%RAX |
0x4d3ea8 LEA -0x1(%RAX),%R8 |
0x4d3eac XOR %ECX,%ECX |
0x4d3eae JMP 4d3ece |
(3939) 0x4d3eb0 MOV %RDX,%R11 |
(3939) 0x4d3eb3 MOV -0x98(%RBP),%RAX |
(3939) 0x4d3eba CMP -0xd8(%RBP),%RAX |
(3939) 0x4d3ec1 MOV -0xe0(%RBP),%RCX |
(3939) 0x4d3ec8 JE 4d3d44 |
(3939) 0x4d3ece LEA 0x1(%RCX),%RAX |
(3939) 0x4d3ed2 MOV %RAX,-0xe0(%RBP) |
(3939) 0x4d3ed9 MOV -0x48(%RBP),%RAX |
(3939) 0x4d3edd MOV 0x8(%RAX,%RCX,8),%RDX |
(3939) 0x4d3ee2 MOV %RDX,%RAX |
(3939) 0x4d3ee5 SUB %R11,%RAX |
(3939) 0x4d3ee8 MOV %RAX,%RSI |
(3939) 0x4d3eeb SHR $0x2,%RSI |
(3939) 0x4d3eef MOV %RSI,-0xa8(%RBP) |
(3939) 0x4d3ef6 MOV -0xd0(%RBP),%RSI |
(3939) 0x4d3efd MOV %RCX,-0x98(%RBP) |
(3939) 0x4d3f04 IMUL %RCX,%RSI |
(3939) 0x4d3f08 MOV %RSI,-0x50(%RBP) |
(3939) 0x4d3f0c MOV %RAX,-0x30(%RBP) |
(3939) 0x4d3f10 AND $-0x4,%RAX |
(3939) 0x4d3f14 LEA 0x18(,%R11,8),%RCX |
(3939) 0x4d3f1c MOV %RCX,-0xa0(%RBP) |
(3939) 0x4d3f23 MOV %RAX,-0xb0(%RBP) |
(3939) 0x4d3f2a ADD %R11,%RAX |
(3939) 0x4d3f2d MOV %RAX,-0x68(%RBP) |
(3939) 0x4d3f31 XOR %R9D,%R9D |
(3939) 0x4d3f34 JMP 4d3f50 |
0x4d3f36 NOPW %CS:(%RAX,%RAX,1) |
(3940) 0x4d3f40 LEA 0x1(%R9),%RAX |
(3940) 0x4d3f44 CMP %R8,%R9 |
(3940) 0x4d3f47 MOV %RAX,%R9 |
(3940) 0x4d3f4a JE 4d3eb0 |
(3940) 0x4d3f50 CMP %R11,%RDX |
(3940) 0x4d3f53 JLE 4d3f40 |
(3940) 0x4d3f55 MOV %R11,%RDI |
(3940) 0x4d3f58 MOV %R8,%RCX |
(3940) 0x4d3f5b CMPQ $0x4,-0x30(%RBP) |
(3940) 0x4d3f60 JB 4d402c |
(3940) 0x4d3f66 MOV -0x78(%RBP),%R10 |
(3940) 0x4d3f6a IMUL %R9,%R10 |
(3940) 0x4d3f6e ADD -0x50(%RBP),%R10 |
(3940) 0x4d3f72 MOV -0x80(%RBP),%R11 |
(3940) 0x4d3f76 IMUL %R9,%R11 |
(3940) 0x4d3f7a MOV -0xa8(%RBP),%RSI |
(3940) 0x4d3f81 MOV -0xa0(%RBP),%R8 |
(3940) 0x4d3f88 NOPL (%RAX,%RAX,1) |
(3942) 0x4d3f90 VMOVSD (%R15,%R10,8),%XMM0 |
(3942) 0x4d3f96 VMOVSD -0x18(%R12,%R8,1),%XMM1 |
(3942) 0x4d3f9d MOV -0x18(%R13,%R8,1),%RAX |
(3942) 0x4d3fa2 IMUL %R14,%RAX |
(3942) 0x4d3fa6 ADD %R11,%RAX |
(3942) 0x4d3fa9 VFMADD213SD (%RBX,%RAX,8),%XMM0,%XMM1 |
(3942) 0x4d3faf VMOVSD %XMM1,(%RBX,%RAX,8) |
(3942) 0x4d3fb4 VMOVSD (%R15,%R10,8),%XMM0 |
(3942) 0x4d3fba VMOVSD -0x10(%R12,%R8,1),%XMM1 |
(3942) 0x4d3fc1 MOV -0x10(%R13,%R8,1),%RAX |
(3942) 0x4d3fc6 IMUL %R14,%RAX |
(3942) 0x4d3fca ADD %R11,%RAX |
(3942) 0x4d3fcd VFMADD213SD (%RBX,%RAX,8),%XMM0,%XMM1 |
(3942) 0x4d3fd3 VMOVSD %XMM1,(%RBX,%RAX,8) |
(3942) 0x4d3fd8 VMOVSD (%R15,%R10,8),%XMM0 |
(3942) 0x4d3fde VMOVSD -0x8(%R12,%R8,1),%XMM1 |
(3942) 0x4d3fe5 MOV -0x8(%R13,%R8,1),%RAX |
(3942) 0x4d3fea IMUL %R14,%RAX |
(3942) 0x4d3fee ADD %R11,%RAX |
(3942) 0x4d3ff1 VFMADD213SD (%RBX,%RAX,8),%XMM0,%XMM1 |
(3942) 0x4d3ff7 VMOVSD %XMM1,(%RBX,%RAX,8) |
(3942) 0x4d3ffc VMOVSD (%R15,%R10,8),%XMM0 |
(3942) 0x4d4002 VMOVSD (%R12,%R8,1),%XMM1 |
(3942) 0x4d4008 MOV (%R13,%R8,1),%RAX |
(3942) 0x4d400d IMUL %R14,%RAX |
(3942) 0x4d4011 ADD %R11,%RAX |
(3942) 0x4d4014 VFMADD213SD (%RBX,%RAX,8),%XMM0,%XMM1 |
(3942) 0x4d401a VMOVSD %XMM1,(%RBX,%RAX,8) |
(3942) 0x4d401f ADD $0x20,%R8 |
(3942) 0x4d4023 DEC %RSI |
(3942) 0x4d4026 JNE 4d3f90 |
(3940) 0x4d402c MOV -0xb0(%RBP),%RAX |
(3940) 0x4d4033 CMP -0x30(%RBP),%RAX |
(3940) 0x4d4037 MOV -0x70(%RBP),%R10 |
(3940) 0x4d403b MOV %RCX,%R8 |
(3940) 0x4d403e MOV %RDI,%R11 |
(3940) 0x4d4041 JAE 4d3f40 |
(3940) 0x4d4047 MOV -0x78(%RBP),%RSI |
(3940) 0x4d404b IMUL %R9,%RSI |
(3940) 0x4d404f ADD -0x50(%RBP),%RSI |
(3940) 0x4d4053 MOV -0x80(%RBP),%RCX |
(3940) 0x4d4057 IMUL %R9,%RCX |
(3940) 0x4d405b MOV -0x68(%RBP),%RAX |
(3940) 0x4d405f NOP |
(3941) 0x4d4060 VMOVSD (%R15,%RSI,8),%XMM0 |
(3941) 0x4d4066 VMOVSD (%R12,%RAX,8),%XMM1 |
(3941) 0x4d406c MOV (%R13,%RAX,8),%RDI |
(3941) 0x4d4071 IMUL %R14,%RDI |
(3941) 0x4d4075 ADD %RCX,%RDI |
(3941) 0x4d4078 VFMADD213SD (%RBX,%RDI,8),%XMM0,%XMM1 |
(3941) 0x4d407e VMOVSD %XMM1,(%RBX,%RDI,8) |
(3941) 0x4d4083 INC %RAX |
(3941) 0x4d4086 CMP %RAX,%RDX |
(3941) 0x4d4089 JNE 4d4060 |
(3940) 0x4d408b JMP 4d3f40 |
0x4d4090 JL 4d4290 |
0x4d4096 MOV -0x40(%RBP),%RAX |
0x4d409a DEC %RAX |
0x4d409d MOV %RAX,-0x98(%RBP) |
0x4d40a4 MOV -0x48(%RBP),%RAX |
0x4d40a8 MOV (%RAX),%RDX |
0x4d40ab MOV -0x38(%RBP),%RAX |
0x4d40af DEC %RAX |
0x4d40b2 MOV %RAX,-0xb0(%RBP) |
0x4d40b9 XOR %ECX,%ECX |
0x4d40bb JMP 4d40db |
0x4d40bd NOPL (%RAX) |
(3932) 0x4d40c0 MOV -0x68(%RBP),%RCX |
(3932) 0x4d40c4 LEA 0x1(%RCX),%RAX |
(3932) 0x4d40c8 MOV %RDI,%RDX |
(3932) 0x4d40cb CMP -0x98(%RBP),%RCX |
(3932) 0x4d40d2 MOV %RAX,%RCX |
(3932) 0x4d40d5 JE 4d4290 |
(3932) 0x4d40db MOV -0x48(%RBP),%RAX |
(3932) 0x4d40df MOV %RCX,-0x68(%RBP) |
(3932) 0x4d40e3 MOV 0x8(%RAX,%RCX,8),%RDI |
(3932) 0x4d40e8 MOV %RDI,%RSI |
(3932) 0x4d40eb SUB %RDX,%RSI |
(3932) 0x4d40ee JLE 4d40c0 |
(3932) 0x4d40f0 MOV %RDX,%RAX |
(3932) 0x4d40f3 MOV %RSI,%RCX |
(3932) 0x4d40f6 SHR $0x2,%RCX |
(3932) 0x4d40fa MOV %RCX,-0xa8(%RBP) |
(3932) 0x4d4101 MOV -0xd0(%RBP),%RCX |
(3932) 0x4d4108 IMUL -0x68(%RBP),%RCX |
(3932) 0x4d410d MOV %RCX,-0x50(%RBP) |
(3932) 0x4d4111 MOV %RSI,%RCX |
(3932) 0x4d4114 AND $-0x4,%RCX |
(3932) 0x4d4118 LEA 0x18(,%RDX,8),%RDX |
(3932) 0x4d4120 MOV %RDX,-0xa0(%RBP) |
(3932) 0x4d4127 MOV %RCX,-0x30(%RBP) |
(3932) 0x4d412b ADD %RCX,%RAX |
(3932) 0x4d412e MOV %RAX,-0x70(%RBP) |
(3932) 0x4d4132 XOR %R9D,%R9D |
(3932) 0x4d4135 JMP 4d4157 |
0x4d4137 NOPW (%RAX,%RAX,1) |
(3933) 0x4d4140 LEA 0x1(%R9),%RAX |
(3933) 0x4d4144 CMP -0xb0(%RBP),%R9 |
(3933) 0x4d414b MOV %RAX,%R9 |
(3933) 0x4d414e MOV %R8,%RSI |
(3933) 0x4d4151 JE 4d40c0 |
(3933) 0x4d4157 CMP $0x4,%RSI |
(3933) 0x4d415b JB 4d422c |
(3933) 0x4d4161 MOV -0x78(%RBP),%R8 |
(3933) 0x4d4165 IMUL %R9,%R8 |
(3933) 0x4d4169 ADD -0x50(%RBP),%R8 |
(3933) 0x4d416d MOV -0x80(%RBP),%R10 |
(3933) 0x4d4171 IMUL %R9,%R10 |
(3933) 0x4d4175 MOV -0xa8(%RBP),%R11 |
(3933) 0x4d417c MOV -0xa0(%RBP),%RCX |
(3933) 0x4d4183 NOPW %CS:(%RAX,%RAX,1) |
(3935) 0x4d4190 VMOVSD (%R15,%R8,8),%XMM0 |
(3935) 0x4d4196 VMOVSD -0x18(%R12,%RCX,1),%XMM1 |
(3935) 0x4d419d MOV -0x18(%R13,%RCX,1),%RDX |
(3935) 0x4d41a2 IMUL %R14,%RDX |
(3935) 0x4d41a6 ADD %R10,%RDX |
(3935) 0x4d41a9 VFMADD213SD (%RBX,%RDX,8),%XMM0,%XMM1 |
(3935) 0x4d41af VMOVSD %XMM1,(%RBX,%RDX,8) |
(3935) 0x4d41b4 VMOVSD (%R15,%R8,8),%XMM0 |
(3935) 0x4d41ba VMOVSD -0x10(%R12,%RCX,1),%XMM1 |
(3935) 0x4d41c1 MOV -0x10(%R13,%RCX,1),%RDX |
(3935) 0x4d41c6 IMUL %R14,%RDX |
(3935) 0x4d41ca ADD %R10,%RDX |
(3935) 0x4d41cd VFMADD213SD (%RBX,%RDX,8),%XMM0,%XMM1 |
(3935) 0x4d41d3 VMOVSD %XMM1,(%RBX,%RDX,8) |
(3935) 0x4d41d8 VMOVSD (%R15,%R8,8),%XMM0 |
(3935) 0x4d41de VMOVSD -0x8(%R12,%RCX,1),%XMM1 |
(3935) 0x4d41e5 MOV -0x8(%R13,%RCX,1),%RDX |
(3935) 0x4d41ea IMUL %R14,%RDX |
(3935) 0x4d41ee ADD %R10,%RDX |
(3935) 0x4d41f1 VFMADD213SD (%RBX,%RDX,8),%XMM0,%XMM1 |
(3935) 0x4d41f7 VMOVSD %XMM1,(%RBX,%RDX,8) |
(3935) 0x4d41fc VMOVSD (%R15,%R8,8),%XMM0 |
(3935) 0x4d4202 VMOVSD (%R12,%RCX,1),%XMM1 |
(3935) 0x4d4208 MOV (%R13,%RCX,1),%RDX |
(3935) 0x4d420d IMUL %R14,%RDX |
(3935) 0x4d4211 ADD %R10,%RDX |
(3935) 0x4d4214 VFMADD213SD (%RBX,%RDX,8),%XMM0,%XMM1 |
(3935) 0x4d421a VMOVSD %XMM1,(%RBX,%RDX,8) |
(3935) 0x4d421f ADD $0x20,%RCX |
(3935) 0x4d4223 DEC %R11 |
(3935) 0x4d4226 JNE 4d4190 |
(3933) 0x4d422c MOV %RSI,%R8 |
(3933) 0x4d422f CMP %RSI,-0x30(%RBP) |
(3933) 0x4d4233 JAE 4d4140 |
(3933) 0x4d4239 MOV -0x78(%RBP),%RCX |
(3933) 0x4d423d IMUL %R9,%RCX |
(3933) 0x4d4241 ADD -0x50(%RBP),%RCX |
(3933) 0x4d4245 MOV -0x80(%RBP),%RDX |
(3933) 0x4d4249 IMUL %R9,%RDX |
(3933) 0x4d424d MOV -0x70(%RBP),%RSI |
(3933) 0x4d4251 NOPW %CS:(%RAX,%RAX,1) |
(3934) 0x4d4260 VMOVSD (%R15,%RCX,8),%XMM0 |
(3934) 0x4d4266 VMOVSD (%R12,%RSI,8),%XMM1 |
(3934) 0x4d426c MOV (%R13,%RSI,8),%RAX |
(3934) 0x4d4271 IMUL %R14,%RAX |
(3934) 0x4d4275 ADD %RDX,%RAX |
(3934) 0x4d4278 VFMADD213SD (%RBX,%RAX,8),%XMM0,%XMM1 |
(3934) 0x4d427e VMOVSD %XMM1,(%RBX,%RAX,8) |
(3934) 0x4d4283 INC %RSI |
(3934) 0x4d4286 CMP %RSI,%RDI |
(3934) 0x4d4289 JNE 4d4260 |
(3933) 0x4d428b JMP 4d4140 |
0x4d4290 VMOVSD -0x58(%RBP),%XMM0 |
0x4d4295 VUCOMISD 0x20d6b(%RIP),%XMM0 |
0x4d429d MOV -0x90(%RBP),%R15 |
0x4d42a4 MOV -0x38(%RBP),%R10 |
0x4d42a8 JE 4d42e1 |
0x4d42aa IMUL %R15,%R10 |
0x4d42ae TEST %R10,%R10 |
0x4d42b1 JLE 4d42e1 |
0x4d42b3 DEC %R10 |
0x4d42b6 VMOVQ %XMM0,%RCX |
0x4d42bb SUB $0x8,%RSP |
0x4d42bf MOV $0x52bf30,%EDI |
0x4d42c4 MOV $0x4d5c30,%EDX |
0x4d42c9 MOV $0x4,%ESI |
0x4d42ce MOV %RBX,%R8 |
0x4d42d1 XOR %R9D,%R9D |
0x4d42d4 XOR %EAX,%EAX |
0x4d42d6 PUSH %R10 |
0x4d42d8 CALL 410390 <__kmpc_fork_call@plt> |
0x4d42dd ADD $0x10,%RSP |
0x4d42e1 MOV -0x88(%RBP),%RAX |
0x4d42e8 CMP -0xc0(%RBP),%RAX |
0x4d42ef MOV -0xc8(%RBP),%R14 |
0x4d42f6 MOV -0x60(%RBP),%R12 |
0x4d42fa JNE 4d4308 |
0x4d42fc MOV -0xb8(%RBP),%RDI |
0x4d4303 CALL 4d6db0 <hypre_SeqVectorDestroy> |
0x4d4308 XOR %EAX,%EAX |
0x4d430a MOV -0x40(%RBP),%RDX |
0x4d430e CMP %R14,%RDX |
0x4d4311 SETNE %AL |
0x4d4314 CMP %R12,%R15 |
0x4d4317 MOV $0x2,%ECX |
0x4d431c CMOVE %RAX,%RCX |
0x4d4320 MOV $0x3,%EAX |
0x4d4325 CMOVE %RCX,%RAX |
0x4d4329 CMP %R14,%RDX |
0x4d432c CMOVE %RCX,%RAX |
0x4d4330 ADD $0xb8,%RSP |
0x4d4337 POP %RBX |
0x4d4338 POP %R12 |
0x4d433a POP %R13 |
0x4d433c POP %R14 |
0x4d433e POP %R15 |
0x4d4340 POP %RBP |
0x4d4341 RET |
0x4d4342 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►95.27+ | hypre_ParCSRMatrixMatvecT | par_csr_matvec.c:432 | exec |
○ | hypre_BoomerAMGCycle | par_cycle.c:435 | exec |
○ | hypre_BoomerAMGSolve | par_amg_solve.c:272 | exec |
○ | hypre_PCGSolve | pcg.c:545 | exec |
○ | main | amg.c:419 | exec |
○ | __libc_init_first | libc.so.6 | |
►4.73+ | hypre_ParCSRMatrixMatvecT | par_csr_matvec.c:432 | exec |
○ | hypre_BoomerAMGCycle | par_cycle.c:435 | exec |
○ | hypre_BoomerAMGSolve | par_amg_solve.c:272 | exec |
○ | hypre_PCGSolve | pcg.c:424 | exec |
○ | main | amg.c:419 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | csr_matvec.c:445-648 |
Module | exec |
nb instructions | 242 |
nb uops | 260 |
loop length | 1002 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 18 |
micro-operation queue | 65.00 cycles |
front end | 65.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 28.50 | 28.50 | 40.17 | 39.83 | 50.00 | 28.50 | 28.50 | 40.00 |
cycles | 28.50 | 28.50 | 40.17 | 39.83 | 50.00 | 28.50 | 28.50 | 40.00 |
Cycles executing div or sqrt instructions | 8.00 |
FE+BE cycles | 62.25-62.26 |
Stall cycles | 0.00 |
Front-end | 65.00 |
Dispatch | 50.00 |
DIV/SQRT | 8.00 |
Overall L1 | 65.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 28% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 5% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 16% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0xb8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x30(%RDI),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x8(%RDI),%R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x10(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x18(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV (%RSI),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%RSI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV (%RDX),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%RDX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x18(%RSI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x30(%RDX),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x28(%RDX),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RSI,-0x80(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x30(%RCX),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RSI,-0xd0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x28(%RCX),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RSI,-0x78(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDX,-0xc0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP 0x18(%RDX),%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JE 4d3ba6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x59063(%RIP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0x505dec,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x5084a8,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD %XMM0,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD %XMM1,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 4ddb80 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x5084d1,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1e5,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4dfac0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMOVSD -0x30(%RBP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD -0x58(%RBP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x88(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VUCOMISD %XMM2,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JNE 4d3c06 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x90(%RBP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
IMUL %R15,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
TEST %R10,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV -0xc8(%RBP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x60(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JLE 4d4308 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
DEC %R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVQ %XMM1,%RCX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x52c130,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4d6350,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R10 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 410390 <__kmpc_fork_call@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
ADD $0x10,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JMP 4d4308 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
CMP -0xc0(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD %XMM0,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JE 4d3c37 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VDIVSD %XMM0,%XMM1,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 4 |
VUCOMISD 0x213d8(%RIP),%XMM0 | 2 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
JNE 4d3c69 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 4d3cdb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD %XMM1,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 4d75f0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMOVSD -0x30(%RBP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD -0x58(%RBP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV (%RAX),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VDIVSD %XMM0,%XMM1,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 4 |
VUCOMISD 0x213a1(%RIP),%XMM0 | 2 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 4d3cdb | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
IMUL -0x90(%RBP),%R10 | 1 | 0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VUCOMISD %XMM1,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JNE 4d3ca8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
TEST %R10,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4d3cdb | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
DEC %R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x52c0d0,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4d62c0,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x3,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R10,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 410390 <__kmpc_fork_call@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
JMP 4d3cdb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
TEST %R10,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4d3cdb | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
DEC %R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVQ %XMM0,%R8 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x52c070,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4d61d0,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R10 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 410390 <__kmpc_fork_call@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
ADD $0x10,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 4df9b0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CMP $0x1,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4d3d51 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
IMUL -0x60(%RBP),%RDI | 1 | 0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 4dd8f0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMPQ $0x1,-0x38(%RBP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JNE 4d3e79 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV $0x52c010,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4d5d20,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x9,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R12,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x48(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R13,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSHQ -0x30(%RBP) | 2 | 0 | 0 | 0.83 | 0.83 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R10 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSHQ -0x60(%RBP) | 2 | 0 | 0 | 0.83 | 0.83 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSHQ -0x40(%RBP) | 2 | 0 | 0 | 0.83 | 0.83 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 410390 <__kmpc_fork_call@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x70(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD $0x30,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R10,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4dd9d0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
JMP 4d4290 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
CMPQ $0,-0x40(%RBP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 4d4290 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMPQ $0x1,-0x38(%RBP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JNE 4d4090 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA -0x1(%RAX),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4d3d90 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMPQ $0,-0x40(%RBP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 4d3d44 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMPQ $0,-0x38(%RBP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 4d3d44 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
DEC %RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,-0xd8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA -0x1(%RAX),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4d3ece | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JL 4d4290 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
DEC %RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
DEC %RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,-0xb0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4d40db | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD -0x58(%RBP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VUCOMISD 0x20d6b(%RIP),%XMM0 | 2 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x90(%RBP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JE 4d42e1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
IMUL %R15,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
TEST %R10,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4d42e1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
DEC %R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVQ %XMM0,%RCX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x52bf30,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4d5c30,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R10 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 410390 <__kmpc_fork_call@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
ADD $0x10,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP -0xc0(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV -0xc8(%RBP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x60(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JNE 4d4308 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0xb8(%RBP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CALL 4d6db0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x40(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R14,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SETNE %AL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CMP %R12,%R15 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x2,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVE %RAX,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV $0x3,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVE %RCX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CMP %R14,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVE %RCX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD $0xb8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | csr_matvec.c:445-648 |
Module | exec |
nb instructions | 242 |
nb uops | 260 |
loop length | 1002 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 18 |
micro-operation queue | 65.00 cycles |
front end | 65.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 28.50 | 28.50 | 40.17 | 39.83 | 50.00 | 28.50 | 28.50 | 40.00 |
cycles | 28.50 | 28.50 | 40.17 | 39.83 | 50.00 | 28.50 | 28.50 | 40.00 |
Cycles executing div or sqrt instructions | 8.00 |
FE+BE cycles | 62.25-62.26 |
Stall cycles | 0.00 |
Front-end | 65.00 |
Dispatch | 50.00 |
DIV/SQRT | 8.00 |
Overall L1 | 65.00 |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 28% |
all | 2% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 5% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 16% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 12% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SUB $0xb8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x30(%RDI),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x8(%RDI),%R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x10(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x18(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x90(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV (%RSI),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%RSI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0xc8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV (%RDX),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%RDX),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x18(%RSI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x30(%RDX),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x28(%RDX),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RSI,-0x80(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x30(%RCX),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RSI,-0xd0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x28(%RCX),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RSI,-0x78(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDX,-0xc0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP 0x18(%RDX),%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JE 4d3ba6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x59063(%RIP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0x505dec,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x5084a8,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD %XMM0,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD %XMM1,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 4ddb80 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV $0x5084d1,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1e5,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x1,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4dfac0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMOVSD -0x30(%RBP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD -0x58(%RBP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x88(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VXORPD %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VUCOMISD %XMM2,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JNE 4d3c06 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x90(%RBP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
IMUL %R15,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
TEST %R10,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV -0xc8(%RBP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x60(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JLE 4d4308 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
DEC %R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVQ %XMM1,%RCX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x52c130,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4d6350,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R10 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 410390 <__kmpc_fork_call@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
ADD $0x10,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JMP 4d4308 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
CMP -0xc0(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VMOVSD %XMM0,-0x58(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JE 4d3c37 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VDIVSD %XMM0,%XMM1,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 4 |
VUCOMISD 0x213d8(%RIP),%XMM0 | 2 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
JNE 4d3c69 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 4d3cdb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD %XMM1,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 4d75f0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
VMOVSD -0x30(%RBP),%XMM1 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD -0x58(%RBP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,-0xb8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV (%RAX),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VDIVSD %XMM0,%XMM1,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 4 |
VUCOMISD 0x213a1(%RIP),%XMM0 | 2 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 4d3cdb | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
IMUL -0x90(%RBP),%R10 | 1 | 0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VUCOMISD %XMM1,%XMM0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JNE 4d3ca8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
TEST %R10,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4d3cdb | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
DEC %R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x52c0d0,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4d62c0,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x3,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R10,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 410390 <__kmpc_fork_call@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
JMP 4d3cdb | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
TEST %R10,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4d3cdb | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
DEC %R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVQ %XMM0,%R8 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x52c070,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4d61d0,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R10 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 410390 <__kmpc_fork_call@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
ADD $0x10,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 4df9b0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CMP $0x1,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4d3d51 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RAX,-0x30(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
IMUL -0x60(%RBP),%RDI | 1 | 0 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CALL 4dd8f0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMPQ $0x1,-0x38(%RBP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV %RAX,-0x70(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JNE 4d3e79 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV $0x52c010,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4d5d20,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x9,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R12,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x48(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R13,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSHQ -0x30(%RBP) | 2 | 0 | 0 | 0.83 | 0.83 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R10 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSHQ -0x60(%RBP) | 2 | 0 | 0 | 0.83 | 0.83 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSHQ -0x40(%RBP) | 2 | 0 | 0 | 0.83 | 0.83 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 410390 <__kmpc_fork_call@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV -0x70(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD $0x30,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R10,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 4dd9d0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
JMP 4d4290 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
CMPQ $0,-0x40(%RBP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 4d4290 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMPQ $0x1,-0x38(%RBP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JNE 4d4090 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA -0x1(%RAX),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4d3d90 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMPQ $0,-0x40(%RBP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 4d3d44 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMPQ $0,-0x38(%RBP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JLE 4d3d44 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
DEC %RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,-0xd8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA -0x1(%RAX),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4d3ece | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JL 4d4290 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
DEC %RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,-0x98(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
DEC %RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RAX,-0xb0(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4d40db | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VMOVSD -0x58(%RBP),%XMM0 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VUCOMISD 0x20d6b(%RIP),%XMM0 | 2 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x90(%RBP),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JE 4d42e1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
IMUL %R15,%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
TEST %R10,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 4d42e1 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
DEC %R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVQ %XMM0,%RCX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x52bf30,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4d5c30,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x4,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R10 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 410390 <__kmpc_fork_call@plt> | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
ADD $0x10,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV -0x88(%RBP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP -0xc0(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV -0xc8(%RBP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x60(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JNE 4d4308 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0xb8(%RBP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CALL 4d6db0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x40(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R14,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SETNE %AL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CMP %R12,%R15 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV $0x2,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVE %RAX,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
MOV $0x3,%EAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVE %RCX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CMP %R14,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMOVE %RCX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
ADD $0xb8,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_CSRMatrixMatvecT– | 2.02 | 0.74 |
▼Loop 3936 - csr_matvec.c:485-615 - exec– | 0.59 | 0.21 |
○Loop 3937 - csr_matvec.c:612-615 - exec | 1.26 | 0.46 |
○Loop 3938 - csr_matvec.c:612-615 - exec | 0.18 | 0.06 |
▼Loop 3932 - csr_matvec.c:485-626 - exec– | 0 | 0 |
▼Loop 3933 - csr_matvec.c:620-626 - exec– | 0 | 0 |
○Loop 3935 - csr_matvec.c:622-626 - exec | 0 | 0 |
○Loop 3934 - csr_matvec.c:622-626 - exec | 0 | 0 |
▼Loop 3939 - csr_matvec.c:485-597 - exec– | 0 | 0 |
▼Loop 3940 - csr_matvec.c:591-597 - exec– | 0 | 0 |
○Loop 3941 - csr_matvec.c:593-597 - exec | 0 | 0 |
○Loop 3942 - csr_matvec.c:593-597 - exec | 0 | 0 |