Function: hypre_ParCSRRelaxThreads._omp_fn.1 | Module: libparcsr_ls.so | Source: ams.c:3662-3682 [...] | Coverage: 48.50% |
---|
Function: hypre_ParCSRRelaxThreads._omp_fn.1 | Module: libparcsr_ls.so | Source: ams.c:3662-3682 [...] | Coverage: 48.50% |
---|
/home/eoseret/qaas_runs_CPU_9468/172-019-1763/intel/AMG/build/AMG/AMG/parcsr_ls/ams.c: 3662 - 3682 |
-------------------------------------------------------------------------------- |
3662: #pragma omp parallel for private(i,ii,jj,res) HYPRE_SMP_SCHEDULE |
[...] |
3669: if (A_diag_data[A_diag_i[i]] != zero) |
3670: { |
3671: res = f_data[i]; |
3672: for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++) |
3673: { |
3674: ii = A_diag_j[jj]; |
3675: res -= A_diag_data[jj] * Vtemp_data[ii]; |
3676: } |
3677: for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) |
3678: { |
3679: ii = A_offd_j[jj]; |
3680: res -= A_offd_data[jj] * Vext_data[ii]; |
3681: } |
3682: u_data[i] += (relax_weight*res)/l1_norms[i]; |
0x124e0 PUSH %RBP |
0x124e1 MOV %RSP,%RBP |
0x124e4 PUSH %R15 |
0x124e6 PUSH %R14 |
0x124e8 PUSH %R13 |
0x124ea MOV %RDI,%R13 |
0x124ed PUSH %R12 |
0x124ef PUSH %RBX |
0x124f0 AND $-0x20,%RSP |
0x124f4 SUB $0x40,%RSP |
0x124f8 CALL c110 <omp_get_num_threads@plt> |
0x124fd MOV %EAX,%EBX |
0x124ff CALL c950 <omp_get_thread_num@plt> |
0x12504 MOVSXD %EBX,%RSI |
0x12507 MOVSXD %EAX,%RCX |
0x1250a MOV 0x40(%R13),%RAX |
0x1250e CQTO |
0x12510 IDIV %RSI |
0x12513 CMP %RDX,%RCX |
0x12516 JL 12b30 |
0x1251c IMUL %RAX,%RCX |
0x12520 ADD %RCX,%RDX |
0x12523 ADD %RDX,%RAX |
0x12526 MOV %RAX,0x28(%RSP) |
0x1252b CMP %RAX,%RDX |
0x1252e JGE 12b1f |
0x12534 MOV 0x30(%R13),%R11 |
0x12538 MOV 0x50(%R13),%RDI |
0x1253c VPCMPEQD %XMM4,%XMM4,%XMM4 |
0x12540 VPCMPEQD %YMM2,%YMM2,%YMM2 |
0x12544 MOV 0x48(%R13),%R10 |
0x12548 MOV 0x28(%R13),%R14 |
0x1254c MOV 0x18(%R13),%R15 |
0x12550 VMOVSD 0x68(%R13),%XMM3 |
0x12556 MOV %R11,0x38(%RSP) |
0x1255b MOV 0x60(%R13),%R9 |
0x1255f MOV 0x58(%R13),%R8 |
0x12563 MOV %RDI,0x20(%RSP) |
0x12568 MOV 0x38(%R13),%R12 |
0x1256c MOV 0x20(%R13),%RBX |
0x12570 MOV %R10,0x18(%RSP) |
0x12575 MOV 0x10(%R13),%R11 |
0x12579 VMOVSD 0x8(%R13),%XMM5 |
0x1257f MOV %R14,0x10(%RSP) |
0x12584 MOV (%R13),%R13 |
0x12588 MOV %R15,0x30(%RSP) |
0x1258d MOV %R13,0x8(%RSP) |
0x12592 JMP 125a6 |
0x12594 NOPL (%RAX) |
(44) 0x12598 INC %RDX |
(44) 0x1259b CMP %RDX,0x28(%RSP) |
(44) 0x125a0 JE 12b1c |
(44) 0x125a6 MOV 0x30(%RSP),%RCX |
(44) 0x125ab MOV (%RCX,%RDX,8),%RAX |
(44) 0x125af LEA (,%RAX,8),%RSI |
(44) 0x125b7 MOV %RAX,%R14 |
(44) 0x125ba LEA (%R11,%RSI,1),%RDI |
(44) 0x125be VCOMISD (%RDI),%XMM3 |
(44) 0x125c2 JE 12598 |
(44) 0x125c4 MOV 0x20(%RSP),%R10 |
(44) 0x125c9 MOV 0x30(%RSP),%R15 |
(44) 0x125ce VMOVSD (%R10,%RDX,8),%XMM6 |
(44) 0x125d4 MOV 0x8(%R15,%RDX,8),%R10 |
(44) 0x125d9 CMP %R10,%RAX |
(44) 0x125dc JGE 12b90 |
(44) 0x125e2 SUB %RAX,%R10 |
(44) 0x125e5 LEA -0x1(%R10),%R13 |
(44) 0x125e9 CMP $0x2,%R13 |
(44) 0x125ed JBE 12baa |
(44) 0x125f3 MOV %R10,%R15 |
(44) 0x125f6 ADD %RBX,%RSI |
(44) 0x125f9 VXORPD %XMM0,%XMM0,%XMM0 |
(44) 0x125fd XOR %ECX,%ECX |
(44) 0x125ff SHR $0x2,%R15 |
(44) 0x12603 SAL $0x5,%R15 |
(44) 0x12607 LEA -0x20(%R15),%R13 |
(44) 0x1260b SHR $0x5,%R13 |
(44) 0x1260f INC %R13 |
(44) 0x12612 AND $0x7,%R13D |
(44) 0x12616 JE 126eb |
(44) 0x1261c CMP $0x1,%R13 |
(44) 0x12620 JE 126c9 |
(44) 0x12626 CMP $0x2,%R13 |
(44) 0x1262a JE 126b0 |
(44) 0x12630 CMP $0x3,%R13 |
(44) 0x12634 JE 12697 |
(44) 0x12636 CMP $0x4,%R13 |
(44) 0x1263a JE 1267e |
(44) 0x1263c CMP $0x5,%R13 |
(44) 0x12640 JE 12665 |
(44) 0x12642 CMP $0x6,%R13 |
(44) 0x12646 JNE 12b40 |
(44) 0x1264c VMOVDQU (%RSI,%RCX,1),%YMM9 |
(44) 0x12651 VMOVAPD %YMM2,%YMM10 |
(44) 0x12655 VGATHERQPD %YMM10,(%R8,%YMM9,8),%YMM12 |
(44) 0x1265b VFNMADD231PD (%RDI,%RCX,1),%YMM12,%YMM0 |
(44) 0x12661 ADD $0x20,%RCX |
(44) 0x12665 VMOVDQU (%RSI,%RCX,1),%YMM13 |
(44) 0x1266a VMOVAPD %YMM2,%YMM14 |
(44) 0x1266e VGATHERQPD %YMM14,(%R8,%YMM13,8),%YMM15 |
(44) 0x12674 VFNMADD231PD (%RDI,%RCX,1),%YMM15,%YMM0 |
(44) 0x1267a ADD $0x20,%RCX |
(44) 0x1267e VMOVDQU (%RSI,%RCX,1),%YMM8 |
(44) 0x12683 VMOVAPD %YMM2,%YMM7 |
(44) 0x12687 VGATHERQPD %YMM7,(%R8,%YMM8,8),%YMM11 |
(44) 0x1268d VFNMADD231PD (%RDI,%RCX,1),%YMM11,%YMM0 |
(44) 0x12693 ADD $0x20,%RCX |
(44) 0x12697 VMOVDQU (%RSI,%RCX,1),%YMM9 |
(44) 0x1269c VMOVAPD %YMM2,%YMM10 |
(44) 0x126a0 VGATHERQPD %YMM10,(%R8,%YMM9,8),%YMM1 |
(44) 0x126a6 VFNMADD231PD (%RDI,%RCX,1),%YMM1,%YMM0 |
(44) 0x126ac ADD $0x20,%RCX |
(44) 0x126b0 VMOVDQU (%RSI,%RCX,1),%YMM14 |
(44) 0x126b5 VMOVAPD %YMM2,%YMM12 |
(44) 0x126b9 VGATHERQPD %YMM12,(%R8,%YMM14,8),%YMM13 |
(44) 0x126bf VFNMADD231PD (%RDI,%RCX,1),%YMM13,%YMM0 |
(44) 0x126c5 ADD $0x20,%RCX |
(44) 0x126c9 VMOVDQU (%RSI,%RCX,1),%YMM7 |
(44) 0x126ce VMOVAPD %YMM2,%YMM15 |
(44) 0x126d2 VGATHERQPD %YMM15,(%R8,%YMM7,8),%YMM8 |
(44) 0x126d8 VFNMADD231PD (%RDI,%RCX,1),%YMM8,%YMM0 |
(44) 0x126de ADD $0x20,%RCX |
(44) 0x126e2 CMP %RCX,%R15 |
(44) 0x126e5 JE 127c9 |
(46) 0x126eb VMOVDQU (%RSI,%RCX,1),%YMM10 |
(46) 0x126f0 VMOVDQU 0x20(%RSI,%RCX,1),%YMM12 |
(46) 0x126f6 VMOVAPD %YMM2,%YMM11 |
(46) 0x126fa VMOVAPD %YMM2,%YMM1 |
(46) 0x126fe VMOVDQU 0x40(%RSI,%RCX,1),%YMM13 |
(46) 0x12704 VMOVDQU 0x60(%RSI,%RCX,1),%YMM8 |
(46) 0x1270a VMOVAPD %YMM2,%YMM14 |
(46) 0x1270e VMOVAPD %YMM2,%YMM7 |
(46) 0x12712 VGATHERQPD %YMM11,(%R8,%YMM10,8),%YMM9 |
(46) 0x12718 VFNMADD132PD (%RDI,%RCX,1),%YMM0,%YMM9 |
(46) 0x1271e VGATHERQPD %YMM1,(%R8,%YMM12,8),%YMM0 |
(46) 0x12724 VMOVDQU 0x80(%RSI,%RCX,1),%YMM10 |
(46) 0x1272d VFNMADD132PD 0x20(%RDI,%RCX,1),%YMM9,%YMM0 |
(46) 0x12734 VMOVAPD %YMM2,%YMM9 |
(46) 0x12738 VMOVAPD %YMM2,%YMM12 |
(46) 0x1273c VGATHERQPD %YMM14,(%R8,%YMM13,8),%YMM15 |
(46) 0x12742 VFNMADD132PD 0x40(%RDI,%RCX,1),%YMM0,%YMM15 |
(46) 0x12749 VGATHERQPD %YMM7,(%R8,%YMM8,8),%YMM11 |
(46) 0x1274f VMOVDQU 0xa0(%RSI,%RCX,1),%YMM14 |
(46) 0x12758 VFNMADD132PD 0x60(%RDI,%RCX,1),%YMM15,%YMM11 |
(46) 0x1275f VMOVAPD %YMM2,%YMM15 |
(46) 0x12763 VMOVAPD %YMM2,%YMM7 |
(46) 0x12767 VMOVDQU 0xc0(%RSI,%RCX,1),%YMM13 |
(46) 0x12770 VGATHERQPD %YMM9,(%R8,%YMM10,8),%YMM1 |
(46) 0x12776 VFNMADD132PD 0x80(%RDI,%RCX,1),%YMM11,%YMM1 |
(46) 0x12780 VGATHERQPD %YMM12,(%R8,%YMM14,8),%YMM0 |
(46) 0x12786 VMOVDQU 0xe0(%RSI,%RCX,1),%YMM11 |
(46) 0x1278f VFNMADD132PD 0xa0(%RDI,%RCX,1),%YMM1,%YMM0 |
(46) 0x12799 VGATHERQPD %YMM15,(%R8,%YMM13,8),%YMM8 |
(46) 0x1279f VFNMADD132PD 0xc0(%RDI,%RCX,1),%YMM0,%YMM8 |
(46) 0x127a9 VGATHERQPD %YMM7,(%R8,%YMM11,8),%YMM0 |
(46) 0x127af VFNMADD132PD 0xe0(%RDI,%RCX,1),%YMM8,%YMM0 |
(46) 0x127b9 ADD $0x100,%RCX |
(46) 0x127c0 CMP %RCX,%R15 |
(46) 0x127c3 JNE 126eb |
(44) 0x127c9 VEXTRACTF128 $0x1,%YMM0,%XMM9 |
(44) 0x127cf VADDPD %XMM0,%XMM9,%XMM10 |
(44) 0x127d3 VUNPCKHPD %XMM10,%XMM10,%XMM1 |
(44) 0x127d8 VADDPD %XMM10,%XMM1,%XMM12 |
(44) 0x127dd VADDSD %XMM12,%XMM6,%XMM1 |
(44) 0x127e2 TEST $0x3,%R10B |
(44) 0x127e6 JE 12843 |
(44) 0x127e8 MOV %R10,%RCX |
(44) 0x127eb VADDPD %XMM9,%XMM0,%XMM7 |
(44) 0x127f0 AND $-0x4,%RCX |
(44) 0x127f4 ADD %RCX,%RAX |
(44) 0x127f7 SUB %RCX,%R10 |
(44) 0x127fa CMP $0x1,%R10 |
(44) 0x127fe JE 12833 |
(44) 0x12800 ADD %R14,%RCX |
(44) 0x12803 VMOVAPD %XMM4,%XMM14 |
(44) 0x12807 VMOVDQU (%RBX,%RCX,8),%XMM15 |
(44) 0x1280c VGATHERQPD %XMM14,(%R8,%XMM15,8),%XMM13 |
(44) 0x12812 VFNMADD132PD (%R11,%RCX,8),%XMM7,%XMM13 |
(44) 0x12818 VUNPCKHPD %XMM13,%XMM13,%XMM8 |
(44) 0x1281d VADDPD %XMM13,%XMM8,%XMM7 |
(44) 0x12822 VADDSD %XMM7,%XMM6,%XMM1 |
(44) 0x12826 TEST $0x1,%R10B |
(44) 0x1282a JE 12843 |
(44) 0x1282c AND $-0x2,%R10 |
(44) 0x12830 ADD %R10,%RAX |
(44) 0x12833 MOV (%RBX,%RAX,8),%R14 |
(44) 0x12837 VMOVSD (%R11,%RAX,8),%XMM6 |
(44) 0x1283d VFNMADD231SD (%R8,%R14,8),%XMM6,%XMM1 |
(44) 0x12843 MOV 0x10(%RSP),%RAX |
(44) 0x12848 MOV (%RAX,%RDX,8),%RCX |
(44) 0x1284c MOV 0x8(%RAX,%RDX,8),%RSI |
(44) 0x12851 CMP %RSI,%RCX |
(44) 0x12854 JGE 12b80 |
(44) 0x1285a SUB %RCX,%RSI |
(44) 0x1285d MOV %RCX,%R15 |
(44) 0x12860 LEA -0x1(%RSI),%RDI |
(44) 0x12864 CMP $0x2,%RDI |
(44) 0x12868 JBE 12b99 |
(44) 0x1286e MOV 0x38(%RSP),%R14 |
(44) 0x12873 LEA (,%RCX,8),%RDI |
(44) 0x1287b XOR %EAX,%EAX |
(44) 0x1287d VXORPD %XMM6,%XMM6,%XMM6 |
(44) 0x12881 LEA (%R12,%RDI,1),%R13 |
(44) 0x12885 ADD %R14,%RDI |
(44) 0x12888 MOV %RSI,%R14 |
(44) 0x1288b SHR $0x2,%R14 |
(44) 0x1288f SAL $0x5,%R14 |
(44) 0x12893 LEA -0x20(%R14),%R10 |
(44) 0x12897 SHR $0x5,%R10 |
(44) 0x1289b INC %R10 |
(44) 0x1289e AND $0x7,%R10D |
(44) 0x128a2 JE 12983 |
(44) 0x128a8 CMP $0x1,%R10 |
(44) 0x128ac JE 1295f |
(44) 0x128b2 CMP $0x2,%R10 |
(44) 0x128b6 JE 12944 |
(44) 0x128bc CMP $0x3,%R10 |
(44) 0x128c0 JE 12929 |
(44) 0x128c2 CMP $0x4,%R10 |
(44) 0x128c6 JE 1290e |
(44) 0x128c8 CMP $0x5,%R10 |
(44) 0x128cc JE 128f3 |
(44) 0x128ce CMP $0x6,%R10 |
(44) 0x128d2 JNE 12b60 |
(44) 0x128d8 VMOVDQU (%R13,%RAX,1),%YMM14 |
(44) 0x128df VMOVAPD %YMM2,%YMM12 |
(44) 0x128e3 VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM15 |
(44) 0x128e9 VFNMADD231PD (%RDI,%RAX,1),%YMM15,%YMM6 |
(44) 0x128ef ADD $0x20,%RAX |
(44) 0x128f3 VMOVDQU (%R13,%RAX,1),%YMM8 |
(44) 0x128fa VMOVAPD %YMM2,%YMM13 |
(44) 0x128fe VGATHERQPD %YMM13,(%R9,%YMM8,8),%YMM7 |
(44) 0x12904 VFNMADD231PD (%RDI,%RAX,1),%YMM7,%YMM6 |
(44) 0x1290a ADD $0x20,%RAX |
(44) 0x1290e VMOVDQU (%R13,%RAX,1),%YMM9 |
(44) 0x12915 VMOVAPD %YMM2,%YMM11 |
(44) 0x12919 VGATHERQPD %YMM11,(%R9,%YMM9,8),%YMM10 |
(44) 0x1291f VFNMADD231PD (%RDI,%RAX,1),%YMM10,%YMM6 |
(44) 0x12925 ADD $0x20,%RAX |
(44) 0x12929 VMOVDQU (%R13,%RAX,1),%YMM14 |
(44) 0x12930 VMOVAPD %YMM2,%YMM12 |
(44) 0x12934 VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM0 |
(44) 0x1293a VFNMADD231PD (%RDI,%RAX,1),%YMM0,%YMM6 |
(44) 0x12940 ADD $0x20,%RAX |
(44) 0x12944 VMOVDQU (%R13,%RAX,1),%YMM13 |
(44) 0x1294b VMOVAPD %YMM2,%YMM15 |
(44) 0x1294f VGATHERQPD %YMM15,(%R9,%YMM13,8),%YMM8 |
(44) 0x12955 VFNMADD231PD (%RDI,%RAX,1),%YMM8,%YMM6 |
(44) 0x1295b ADD $0x20,%RAX |
(44) 0x1295f VMOVDQU (%R13,%RAX,1),%YMM11 |
(44) 0x12966 VMOVAPD %YMM2,%YMM7 |
(44) 0x1296a VGATHERQPD %YMM7,(%R9,%YMM11,8),%YMM9 |
(44) 0x12970 VFNMADD231PD (%RDI,%RAX,1),%YMM9,%YMM6 |
(44) 0x12976 ADD $0x20,%RAX |
(44) 0x1297a CMP %RAX,%R14 |
(44) 0x1297d JE 12a69 |
(45) 0x12983 VMOVDQU (%R13,%RAX,1),%YMM12 |
(45) 0x1298a VMOVAPD %YMM2,%YMM10 |
(45) 0x1298e VMOVAPD %YMM2,%YMM13 |
(45) 0x12992 VMOVDQU 0x20(%R13,%RAX,1),%YMM15 |
(45) 0x12999 VMOVDQU 0x40(%R13,%RAX,1),%YMM7 |
(45) 0x129a0 VMOVAPD %YMM2,%YMM8 |
(45) 0x129a4 VMOVAPD %YMM2,%YMM9 |
(45) 0x129a8 VGATHERQPD %YMM10,(%R9,%YMM12,8),%YMM14 |
(45) 0x129ae VFNMADD231PD (%RDI,%RAX,1),%YMM14,%YMM6 |
(45) 0x129b4 VGATHERQPD %YMM13,(%R9,%YMM15,8),%YMM0 |
(45) 0x129ba VMOVDQU 0x60(%R13,%RAX,1),%YMM10 |
(45) 0x129c1 VFNMADD231PD 0x20(%RDI,%RAX,1),%YMM0,%YMM6 |
(45) 0x129c8 VMOVAPD %YMM2,%YMM12 |
(45) 0x129cc VMOVAPD %YMM2,%YMM15 |
(45) 0x129d0 VMOVDQU 0x80(%R13,%RAX,1),%YMM14 |
(45) 0x129da VGATHERQPD %YMM8,(%R9,%YMM7,8),%YMM11 |
(45) 0x129e0 VMOVAPD %YMM2,%YMM7 |
(45) 0x129e4 VMOVDQU 0xa0(%R13,%RAX,1),%YMM0 |
(45) 0x129ee VFNMADD132PD 0x40(%RDI,%RAX,1),%YMM6,%YMM11 |
(45) 0x129f5 VGATHERQPD %YMM9,(%R9,%YMM10,8),%YMM6 |
(45) 0x129fb VMOVAPD %YMM2,%YMM10 |
(45) 0x129ff VFNMADD132PD 0x60(%RDI,%RAX,1),%YMM11,%YMM6 |
(45) 0x12a06 VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM13 |
(45) 0x12a0c VMOVDQU 0xc0(%R13,%RAX,1),%YMM11 |
(45) 0x12a16 VGATHERQPD %YMM15,(%R9,%YMM0,8),%YMM8 |
(45) 0x12a1c VFNMADD132PD 0x80(%RDI,%RAX,1),%YMM6,%YMM13 |
(45) 0x12a26 VMOVDQU 0xe0(%R13,%RAX,1),%YMM12 |
(45) 0x12a30 VFNMADD132PD 0xa0(%RDI,%RAX,1),%YMM13,%YMM8 |
(45) 0x12a3a VGATHERQPD %YMM7,(%R9,%YMM11,8),%YMM9 |
(45) 0x12a40 VFNMADD132PD 0xc0(%RDI,%RAX,1),%YMM8,%YMM9 |
(45) 0x12a4a VGATHERQPD %YMM10,(%R9,%YMM12,8),%YMM6 |
(45) 0x12a50 VFNMADD132PD 0xe0(%RDI,%RAX,1),%YMM9,%YMM6 |
(45) 0x12a5a ADD $0x100,%RAX |
(45) 0x12a60 CMP %RAX,%R14 |
(45) 0x12a63 JNE 12983 |
(44) 0x12a69 VEXTRACTF128 $0x1,%YMM6,%XMM14 |
(44) 0x12a6f VADDPD %XMM6,%XMM14,%XMM13 |
(44) 0x12a73 VUNPCKHPD %XMM13,%XMM13,%XMM15 |
(44) 0x12a78 VADDPD %XMM13,%XMM15,%XMM0 |
(44) 0x12a7d VADDSD %XMM0,%XMM1,%XMM12 |
(44) 0x12a81 TEST $0x3,%SIL |
(44) 0x12a85 JE 12aec |
(44) 0x12a87 MOV %RSI,%R10 |
(44) 0x12a8a VADDPD %XMM6,%XMM14,%XMM11 |
(44) 0x12a8e AND $-0x4,%R10 |
(44) 0x12a92 ADD %R10,%RCX |
(44) 0x12a95 SUB %R10,%RSI |
(44) 0x12a98 CMP $0x1,%RSI |
(44) 0x12a9c JE 12ad7 |
(44) 0x12a9e ADD %R15,%R10 |
(44) 0x12aa1 MOV 0x38(%RSP),%R15 |
(44) 0x12aa6 VMOVAPD %XMM4,%XMM8 |
(44) 0x12aaa VMOVDQU (%R12,%R10,8),%XMM7 |
(44) 0x12ab0 VGATHERQPD %XMM8,(%R9,%XMM7,8),%XMM9 |
(44) 0x12ab6 VFNMADD132PD (%R15,%R10,8),%XMM11,%XMM9 |
(44) 0x12abc VUNPCKHPD %XMM9,%XMM9,%XMM11 |
(44) 0x12ac1 VADDPD %XMM9,%XMM11,%XMM10 |
(44) 0x12ac6 VADDSD %XMM1,%XMM10,%XMM12 |
(44) 0x12aca TEST $0x1,%SIL |
(44) 0x12ace JE 12aec |
(44) 0x12ad0 AND $-0x2,%RSI |
(44) 0x12ad4 ADD %RSI,%RCX |
(44) 0x12ad7 MOV (%R12,%RCX,8),%RSI |
(44) 0x12adb MOV 0x38(%RSP),%RDI |
(44) 0x12ae0 VMOVSD (%R9,%RSI,8),%XMM1 |
(44) 0x12ae6 VFNMADD231SD (%RDI,%RCX,8),%XMM1,%XMM12 |
(44) 0x12aec MOV 0x18(%RSP),%R13 |
(44) 0x12af1 MOV 0x8(%RSP),%RCX |
(44) 0x12af6 VMULSD %XMM12,%XMM5,%XMM6 |
(44) 0x12afb VDIVSD (%RCX,%RDX,8),%XMM6,%XMM14 |
(44) 0x12b00 VADDSD (%R13,%RDX,8),%XMM14,%XMM13 |
(44) 0x12b07 VMOVSD %XMM13,(%R13,%RDX,8) |
(44) 0x12b0e INC %RDX |
(44) 0x12b11 CMP %RDX,0x28(%RSP) |
(44) 0x12b16 JNE 125a6 |
0x12b1c VZEROUPPER |
0x12b1f LEA -0x28(%RBP),%RSP |
0x12b23 POP %RBX |
0x12b24 POP %R12 |
0x12b26 POP %R13 |
0x12b28 POP %R14 |
0x12b2a POP %R15 |
0x12b2c POP %RBP |
0x12b2d RET |
0x12b2e XCHG %AX,%AX |
0x12b30 INC %RAX |
0x12b33 XOR %EDX,%EDX |
0x12b35 JMP 1251c |
0x12b3a NOPW (%RAX,%RAX,1) |
(44) 0x12b40 VMOVDQU (%RSI),%YMM11 |
(44) 0x12b44 VMOVAPD %YMM2,%YMM8 |
(44) 0x12b48 MOV $0x20,%ECX |
(44) 0x12b4d VGATHERQPD %YMM8,(%R8,%YMM11,8),%YMM1 |
(44) 0x12b53 VFNMADD231PD (%RDI),%YMM1,%YMM0 |
(44) 0x12b58 JMP 1264c |
0x12b5d NOPL (%RAX) |
(44) 0x12b60 VMOVDQU (%R13),%YMM10 |
(44) 0x12b66 VMOVAPD %YMM2,%YMM9 |
(44) 0x12b6a MOV $0x20,%EAX |
(44) 0x12b6f VGATHERQPD %YMM9,(%R9,%YMM10,8),%YMM0 |
(44) 0x12b75 VFNMADD231PD (%RDI),%YMM0,%YMM6 |
(44) 0x12b7a JMP 128d8 |
0x12b7f NOP |
(44) 0x12b80 VMOVSD %XMM1,%XMM1,%XMM12 |
(44) 0x12b84 JMP 12aec |
0x12b89 NOPL (%RAX) |
(44) 0x12b90 VMOVSD %XMM6,%XMM6,%XMM1 |
(44) 0x12b94 JMP 12843 |
(44) 0x12b99 VMOVSD %XMM1,%XMM1,%XMM12 |
(44) 0x12b9d VXORPD %XMM11,%XMM11,%XMM11 |
(44) 0x12ba2 XOR %R10D,%R10D |
(44) 0x12ba5 JMP 12a95 |
(44) 0x12baa VMOVSD %XMM6,%XMM6,%XMM1 |
(44) 0x12bae VXORPD %XMM7,%XMM7,%XMM7 |
(44) 0x12bb2 XOR %ECX,%ECX |
(44) 0x12bb4 JMP 127f7 |
0x12bb9 NOPL (%RAX) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○95.82 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○4.18 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | ams.c:3662-3682 |
Module | libparcsr_ls.so |
nb instructions | 67 |
nb uops | 62 |
loop length | 238 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 8 |
micro-operation queue | 10.33 cycles |
front end | 10.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.50 | 3.75 | 3.75 | 3.50 | 4.50 | 7.67 | 7.67 | 7.67 | 0.50 | 0.50 | 0.50 | 0.50 | 0.00 | 0.00 |
cycles | 4.50 | 3.75 | 3.75 | 3.50 | 4.50 | 7.67 | 7.67 | 7.67 | 0.50 | 0.50 | 0.50 | 0.50 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 7.00-12.00 |
Front-end | 10.33 |
Dispatch | 7.67 |
DIV/SQRT | 7.00-12.00 |
Overall L1 | 10.33-12.00 |
all | 10% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 27% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 9% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 30% |
all | 14% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 17% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 14% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 17% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SUB $0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CALL c110 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
CALL c950 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOVSXD %EBX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOVSXD %EAX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV 0x40(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CQTO | scal (12.5%) | |||||||||||||||||
IDIV %RSI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 | scal (12.5%) |
CMP %RDX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JL 12b30 <hypre_ParCSRRelaxThreads._omp_fn.1+0x650> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
IMUL %RAX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
ADD %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
ADD %RDX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CMP %RAX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 12b1f <hypre_ParCSRRelaxThreads._omp_fn.1+0x63f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x30(%R13),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x50(%R13),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
VPCMPEQD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VPCMPEQD %YMM2,%YMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (50.0%) |
MOV 0x48(%R13),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x28(%R13),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x18(%R13),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
VMOVSD 0x68(%R13),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV %R11,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x60(%R13),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x58(%R13),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV %RDI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x38(%R13),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x20(%R13),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV %R10,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x10(%R13),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
VMOVSD 0x8(%R13),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV %R14,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV (%R13),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R15,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R13,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JMP 125a6 <hypre_ParCSRRelaxThreads._omp_fn.1+0xc6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
JMP 1251c <hypre_ParCSRRelaxThreads._omp_fn.1+0x3c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
Source file and lines | ams.c:3662-3682 |
Module | libparcsr_ls.so |
nb instructions | 67 |
nb uops | 62 |
loop length | 238 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 8 |
micro-operation queue | 10.33 cycles |
front end | 10.33 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 4.50 | 3.75 | 3.75 | 3.50 | 4.50 | 7.67 | 7.67 | 7.67 | 0.50 | 0.50 | 0.50 | 0.50 | 0.00 | 0.00 |
cycles | 4.50 | 3.75 | 3.75 | 3.50 | 4.50 | 7.67 | 7.67 | 7.67 | 0.50 | 0.50 | 0.50 | 0.50 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | 7.00-12.00 |
Front-end | 10.33 |
Dispatch | 7.67 |
DIV/SQRT | 7.00-12.00 |
Overall L1 | 10.33-12.00 |
all | 10% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 27% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 9% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 30% |
all | 14% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 17% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 14% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 17% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
AND $-0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SUB $0x40,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CALL c110 <omp_get_num_threads@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (6.3%) |
CALL c950 <omp_get_thread_num@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
MOVSXD %EBX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOVSXD %EAX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV 0x40(%R13),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CQTO | scal (12.5%) | |||||||||||||||||
IDIV %RSI | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9-19 | 7-12 | scal (12.5%) |
CMP %RDX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JL 12b30 <hypre_ParCSRRelaxThreads._omp_fn.1+0x650> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
IMUL %RAX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | scal (12.5%) |
ADD %RCX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
ADD %RDX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CMP %RAX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 12b1f <hypre_ParCSRRelaxThreads._omp_fn.1+0x63f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x30(%R13),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x50(%R13),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
VPCMPEQD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VPCMPEQD %YMM2,%YMM2,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (50.0%) |
MOV 0x48(%R13),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x28(%R13),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x18(%R13),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
VMOVSD 0x68(%R13),%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV %R11,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x60(%R13),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x58(%R13),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV %RDI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x38(%R13),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x20(%R13),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV %R10,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x10(%R13),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
VMOVSD 0x8(%R13),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV %R14,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV (%R13),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R15,0x30(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R13,0x8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JMP 125a6 <hypre_ParCSRRelaxThreads._omp_fn.1+0xc6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
LEA -0x28(%RBP),%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
RET | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
INC %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
JMP 1251c <hypre_ParCSRRelaxThreads._omp_fn.1+0x3c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_ParCSRRelaxThreads._omp_fn.1– | 48.50 | 36.04 |
▼Loop 44 - ams.c:3662-3682 - libparcsr_ls.so– | 0.91 | 0.61 |
○Loop 45 - ams.c:3677-3680 - libparcsr_ls.so | 0.01 | 0.01 |
○Loop 46 - ams.c:3672-3675 - libparcsr_ls.so | 0.00 | 0.00 |