Loop Id: 44 | Module: libparcsr_ls.so | Source: ams.c:3662-3682 [...] | Coverage: 48.07% |
---|
Loop Id: 44 | Module: libparcsr_ls.so | Source: ams.c:3662-3682 [...] | Coverage: 48.07% |
---|
0x12598 INC %RDX |
0x1259b CMP %RDX,0x28(%RSP) |
0x125a0 JE 12b1c |
0x125a6 MOV 0x30(%RSP),%RCX |
0x125ab MOV (%RCX,%RDX,8),%RAX |
0x125af LEA (,%RAX,8),%RSI |
0x125b7 MOV %RAX,%R14 |
0x125ba LEA (%R11,%RSI,1),%RDI |
0x125be VCOMISD (%RDI),%XMM3 |
0x125c2 JE 12598 |
0x125c4 MOV 0x20(%RSP),%R10 |
0x125c9 MOV 0x30(%RSP),%R15 |
0x125ce VMOVSD (%R10,%RDX,8),%XMM6 |
0x125d4 MOV 0x8(%R15,%RDX,8),%R10 |
0x125d9 CMP %R10,%RAX |
0x125dc JGE 12b90 |
0x125e2 SUB %RAX,%R10 |
0x125e5 LEA -0x1(%R10),%R13 |
0x125e9 CMP $0x2,%R13 |
0x125ed JBE 12baa |
0x125f3 MOV %R10,%R15 |
0x125f6 ADD %RBX,%RSI |
0x125f9 VXORPD %XMM0,%XMM0,%XMM0 |
0x125fd XOR %ECX,%ECX |
0x125ff SHR $0x2,%R15 |
0x12603 SAL $0x5,%R15 |
0x12607 LEA -0x20(%R15),%R13 |
0x1260b SHR $0x5,%R13 |
0x1260f INC %R13 |
0x12612 AND $0x7,%R13D |
0x12616 JE 126eb |
0x1261c CMP $0x1,%R13 |
0x12620 JE 126c9 |
0x12626 CMP $0x2,%R13 |
0x1262a JE 126b0 |
0x12630 CMP $0x3,%R13 |
0x12634 JE 12697 |
0x12636 CMP $0x4,%R13 |
0x1263a JE 1267e |
0x1263c CMP $0x5,%R13 |
0x12640 JE 12665 |
0x12642 CMP $0x6,%R13 |
0x12646 JNE 12b40 |
0x1264c VMOVDQU (%RSI,%RCX,1),%YMM9 |
0x12651 VMOVAPD %YMM2,%YMM10 |
0x12655 VGATHERQPD %YMM10,(%R8,%YMM9,8),%YMM12 |
0x1265b VFNMADD231PD (%RDI,%RCX,1),%YMM12,%YMM0 |
0x12661 ADD $0x20,%RCX |
0x12665 VMOVDQU (%RSI,%RCX,1),%YMM13 |
0x1266a VMOVAPD %YMM2,%YMM14 |
0x1266e VGATHERQPD %YMM14,(%R8,%YMM13,8),%YMM15 |
0x12674 VFNMADD231PD (%RDI,%RCX,1),%YMM15,%YMM0 |
0x1267a ADD $0x20,%RCX |
0x1267e VMOVDQU (%RSI,%RCX,1),%YMM8 |
0x12683 VMOVAPD %YMM2,%YMM7 |
0x12687 VGATHERQPD %YMM7,(%R8,%YMM8,8),%YMM11 |
0x1268d VFNMADD231PD (%RDI,%RCX,1),%YMM11,%YMM0 |
0x12693 ADD $0x20,%RCX |
0x12697 VMOVDQU (%RSI,%RCX,1),%YMM9 |
0x1269c VMOVAPD %YMM2,%YMM10 |
0x126a0 VGATHERQPD %YMM10,(%R8,%YMM9,8),%YMM1 |
0x126a6 VFNMADD231PD (%RDI,%RCX,1),%YMM1,%YMM0 |
0x126ac ADD $0x20,%RCX |
0x126b0 VMOVDQU (%RSI,%RCX,1),%YMM14 |
0x126b5 VMOVAPD %YMM2,%YMM12 |
0x126b9 VGATHERQPD %YMM12,(%R8,%YMM14,8),%YMM13 |
0x126bf VFNMADD231PD (%RDI,%RCX,1),%YMM13,%YMM0 |
0x126c5 ADD $0x20,%RCX |
0x126c9 VMOVDQU (%RSI,%RCX,1),%YMM7 |
0x126ce VMOVAPD %YMM2,%YMM15 |
0x126d2 VGATHERQPD %YMM15,(%R8,%YMM7,8),%YMM8 |
0x126d8 VFNMADD231PD (%RDI,%RCX,1),%YMM8,%YMM0 |
0x126de ADD $0x20,%RCX |
0x126e2 CMP %RCX,%R15 |
0x126e5 JE 127c9 |
(46) 0x126eb VMOVDQU (%RSI,%RCX,1),%YMM10 |
(46) 0x126f0 VMOVDQU 0x20(%RSI,%RCX,1),%YMM12 |
(46) 0x126f6 VMOVAPD %YMM2,%YMM11 |
(46) 0x126fa VMOVAPD %YMM2,%YMM1 |
(46) 0x126fe VMOVDQU 0x40(%RSI,%RCX,1),%YMM13 |
(46) 0x12704 VMOVDQU 0x60(%RSI,%RCX,1),%YMM8 |
(46) 0x1270a VMOVAPD %YMM2,%YMM14 |
(46) 0x1270e VMOVAPD %YMM2,%YMM7 |
(46) 0x12712 VGATHERQPD %YMM11,(%R8,%YMM10,8),%YMM9 |
(46) 0x12718 VFNMADD132PD (%RDI,%RCX,1),%YMM0,%YMM9 |
(46) 0x1271e VGATHERQPD %YMM1,(%R8,%YMM12,8),%YMM0 |
(46) 0x12724 VMOVDQU 0x80(%RSI,%RCX,1),%YMM10 |
(46) 0x1272d VFNMADD132PD 0x20(%RDI,%RCX,1),%YMM9,%YMM0 |
(46) 0x12734 VMOVAPD %YMM2,%YMM9 |
(46) 0x12738 VMOVAPD %YMM2,%YMM12 |
(46) 0x1273c VGATHERQPD %YMM14,(%R8,%YMM13,8),%YMM15 |
(46) 0x12742 VFNMADD132PD 0x40(%RDI,%RCX,1),%YMM0,%YMM15 |
(46) 0x12749 VGATHERQPD %YMM7,(%R8,%YMM8,8),%YMM11 |
(46) 0x1274f VMOVDQU 0xa0(%RSI,%RCX,1),%YMM14 |
(46) 0x12758 VFNMADD132PD 0x60(%RDI,%RCX,1),%YMM15,%YMM11 |
(46) 0x1275f VMOVAPD %YMM2,%YMM15 |
(46) 0x12763 VMOVAPD %YMM2,%YMM7 |
(46) 0x12767 VMOVDQU 0xc0(%RSI,%RCX,1),%YMM13 |
(46) 0x12770 VGATHERQPD %YMM9,(%R8,%YMM10,8),%YMM1 |
(46) 0x12776 VFNMADD132PD 0x80(%RDI,%RCX,1),%YMM11,%YMM1 |
(46) 0x12780 VGATHERQPD %YMM12,(%R8,%YMM14,8),%YMM0 |
(46) 0x12786 VMOVDQU 0xe0(%RSI,%RCX,1),%YMM11 |
(46) 0x1278f VFNMADD132PD 0xa0(%RDI,%RCX,1),%YMM1,%YMM0 |
(46) 0x12799 VGATHERQPD %YMM15,(%R8,%YMM13,8),%YMM8 |
(46) 0x1279f VFNMADD132PD 0xc0(%RDI,%RCX,1),%YMM0,%YMM8 |
(46) 0x127a9 VGATHERQPD %YMM7,(%R8,%YMM11,8),%YMM0 |
(46) 0x127af VFNMADD132PD 0xe0(%RDI,%RCX,1),%YMM8,%YMM0 |
(46) 0x127b9 ADD $0x100,%RCX |
(46) 0x127c0 CMP %RCX,%R15 |
(46) 0x127c3 JNE 126eb |
0x127c9 VEXTRACTF128 $0x1,%YMM0,%XMM9 |
0x127cf VADDPD %XMM0,%XMM9,%XMM10 |
0x127d3 VUNPCKHPD %XMM10,%XMM10,%XMM1 |
0x127d8 VADDPD %XMM10,%XMM1,%XMM12 |
0x127dd VADDSD %XMM12,%XMM6,%XMM1 |
0x127e2 TEST $0x3,%R10B |
0x127e6 JE 12843 |
0x127e8 MOV %R10,%RCX |
0x127eb VADDPD %XMM9,%XMM0,%XMM7 |
0x127f0 AND $-0x4,%RCX |
0x127f4 ADD %RCX,%RAX |
0x127f7 SUB %RCX,%R10 |
0x127fa CMP $0x1,%R10 |
0x127fe JE 12833 |
0x12800 ADD %R14,%RCX |
0x12803 VMOVAPD %XMM4,%XMM14 |
0x12807 VMOVDQU (%RBX,%RCX,8),%XMM15 |
0x1280c VGATHERQPD %XMM14,(%R8,%XMM15,8),%XMM13 |
0x12812 VFNMADD132PD (%R11,%RCX,8),%XMM7,%XMM13 |
0x12818 VUNPCKHPD %XMM13,%XMM13,%XMM8 |
0x1281d VADDPD %XMM13,%XMM8,%XMM7 |
0x12822 VADDSD %XMM7,%XMM6,%XMM1 |
0x12826 TEST $0x1,%R10B |
0x1282a JE 12843 |
0x1282c AND $-0x2,%R10 |
0x12830 ADD %R10,%RAX |
0x12833 MOV (%RBX,%RAX,8),%R14 |
0x12837 VMOVSD (%R11,%RAX,8),%XMM6 |
0x1283d VFNMADD231SD (%R8,%R14,8),%XMM6,%XMM1 |
0x12843 MOV 0x10(%RSP),%RAX |
0x12848 MOV (%RAX,%RDX,8),%RCX |
0x1284c MOV 0x8(%RAX,%RDX,8),%RSI |
0x12851 CMP %RSI,%RCX |
0x12854 JGE 12b80 |
0x1285a SUB %RCX,%RSI |
0x1285d MOV %RCX,%R15 |
0x12860 LEA -0x1(%RSI),%RDI |
0x12864 CMP $0x2,%RDI |
0x12868 JBE 12b99 |
0x1286e MOV 0x38(%RSP),%R14 |
0x12873 LEA (,%RCX,8),%RDI |
0x1287b XOR %EAX,%EAX |
0x1287d VXORPD %XMM6,%XMM6,%XMM6 |
0x12881 LEA (%R12,%RDI,1),%R13 |
0x12885 ADD %R14,%RDI |
0x12888 MOV %RSI,%R14 |
0x1288b SHR $0x2,%R14 |
0x1288f SAL $0x5,%R14 |
0x12893 LEA -0x20(%R14),%R10 |
0x12897 SHR $0x5,%R10 |
0x1289b INC %R10 |
0x1289e AND $0x7,%R10D |
0x128a2 JE 12983 |
0x128a8 CMP $0x1,%R10 |
0x128ac JE 1295f |
0x128b2 CMP $0x2,%R10 |
0x128b6 JE 12944 |
0x128bc CMP $0x3,%R10 |
0x128c0 JE 12929 |
0x128c2 CMP $0x4,%R10 |
0x128c6 JE 1290e |
0x128c8 CMP $0x5,%R10 |
0x128cc JE 128f3 |
0x128ce CMP $0x6,%R10 |
0x128d2 JNE 12b60 |
0x128d8 VMOVDQU (%R13,%RAX,1),%YMM14 |
0x128df VMOVAPD %YMM2,%YMM12 |
0x128e3 VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM15 |
0x128e9 VFNMADD231PD (%RDI,%RAX,1),%YMM15,%YMM6 |
0x128ef ADD $0x20,%RAX |
0x128f3 VMOVDQU (%R13,%RAX,1),%YMM8 |
0x128fa VMOVAPD %YMM2,%YMM13 |
0x128fe VGATHERQPD %YMM13,(%R9,%YMM8,8),%YMM7 |
0x12904 VFNMADD231PD (%RDI,%RAX,1),%YMM7,%YMM6 |
0x1290a ADD $0x20,%RAX |
0x1290e VMOVDQU (%R13,%RAX,1),%YMM9 |
0x12915 VMOVAPD %YMM2,%YMM11 |
0x12919 VGATHERQPD %YMM11,(%R9,%YMM9,8),%YMM10 |
0x1291f VFNMADD231PD (%RDI,%RAX,1),%YMM10,%YMM6 |
0x12925 ADD $0x20,%RAX |
0x12929 VMOVDQU (%R13,%RAX,1),%YMM14 |
0x12930 VMOVAPD %YMM2,%YMM12 |
0x12934 VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM0 |
0x1293a VFNMADD231PD (%RDI,%RAX,1),%YMM0,%YMM6 |
0x12940 ADD $0x20,%RAX |
0x12944 VMOVDQU (%R13,%RAX,1),%YMM13 |
0x1294b VMOVAPD %YMM2,%YMM15 |
0x1294f VGATHERQPD %YMM15,(%R9,%YMM13,8),%YMM8 |
0x12955 VFNMADD231PD (%RDI,%RAX,1),%YMM8,%YMM6 |
0x1295b ADD $0x20,%RAX |
0x1295f VMOVDQU (%R13,%RAX,1),%YMM11 |
0x12966 VMOVAPD %YMM2,%YMM7 |
0x1296a VGATHERQPD %YMM7,(%R9,%YMM11,8),%YMM9 |
0x12970 VFNMADD231PD (%RDI,%RAX,1),%YMM9,%YMM6 |
0x12976 ADD $0x20,%RAX |
0x1297a CMP %RAX,%R14 |
0x1297d JE 12a69 |
(45) 0x12983 VMOVDQU (%R13,%RAX,1),%YMM12 |
(45) 0x1298a VMOVAPD %YMM2,%YMM10 |
(45) 0x1298e VMOVAPD %YMM2,%YMM13 |
(45) 0x12992 VMOVDQU 0x20(%R13,%RAX,1),%YMM15 |
(45) 0x12999 VMOVDQU 0x40(%R13,%RAX,1),%YMM7 |
(45) 0x129a0 VMOVAPD %YMM2,%YMM8 |
(45) 0x129a4 VMOVAPD %YMM2,%YMM9 |
(45) 0x129a8 VGATHERQPD %YMM10,(%R9,%YMM12,8),%YMM14 |
(45) 0x129ae VFNMADD231PD (%RDI,%RAX,1),%YMM14,%YMM6 |
(45) 0x129b4 VGATHERQPD %YMM13,(%R9,%YMM15,8),%YMM0 |
(45) 0x129ba VMOVDQU 0x60(%R13,%RAX,1),%YMM10 |
(45) 0x129c1 VFNMADD231PD 0x20(%RDI,%RAX,1),%YMM0,%YMM6 |
(45) 0x129c8 VMOVAPD %YMM2,%YMM12 |
(45) 0x129cc VMOVAPD %YMM2,%YMM15 |
(45) 0x129d0 VMOVDQU 0x80(%R13,%RAX,1),%YMM14 |
(45) 0x129da VGATHERQPD %YMM8,(%R9,%YMM7,8),%YMM11 |
(45) 0x129e0 VMOVAPD %YMM2,%YMM7 |
(45) 0x129e4 VMOVDQU 0xa0(%R13,%RAX,1),%YMM0 |
(45) 0x129ee VFNMADD132PD 0x40(%RDI,%RAX,1),%YMM6,%YMM11 |
(45) 0x129f5 VGATHERQPD %YMM9,(%R9,%YMM10,8),%YMM6 |
(45) 0x129fb VMOVAPD %YMM2,%YMM10 |
(45) 0x129ff VFNMADD132PD 0x60(%RDI,%RAX,1),%YMM11,%YMM6 |
(45) 0x12a06 VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM13 |
(45) 0x12a0c VMOVDQU 0xc0(%R13,%RAX,1),%YMM11 |
(45) 0x12a16 VGATHERQPD %YMM15,(%R9,%YMM0,8),%YMM8 |
(45) 0x12a1c VFNMADD132PD 0x80(%RDI,%RAX,1),%YMM6,%YMM13 |
(45) 0x12a26 VMOVDQU 0xe0(%R13,%RAX,1),%YMM12 |
(45) 0x12a30 VFNMADD132PD 0xa0(%RDI,%RAX,1),%YMM13,%YMM8 |
(45) 0x12a3a VGATHERQPD %YMM7,(%R9,%YMM11,8),%YMM9 |
(45) 0x12a40 VFNMADD132PD 0xc0(%RDI,%RAX,1),%YMM8,%YMM9 |
(45) 0x12a4a VGATHERQPD %YMM10,(%R9,%YMM12,8),%YMM6 |
(45) 0x12a50 VFNMADD132PD 0xe0(%RDI,%RAX,1),%YMM9,%YMM6 |
(45) 0x12a5a ADD $0x100,%RAX |
(45) 0x12a60 CMP %RAX,%R14 |
(45) 0x12a63 JNE 12983 |
0x12a69 VEXTRACTF128 $0x1,%YMM6,%XMM14 |
0x12a6f VADDPD %XMM6,%XMM14,%XMM13 |
0x12a73 VUNPCKHPD %XMM13,%XMM13,%XMM15 |
0x12a78 VADDPD %XMM13,%XMM15,%XMM0 |
0x12a7d VADDSD %XMM0,%XMM1,%XMM12 |
0x12a81 TEST $0x3,%SIL |
0x12a85 JE 12aec |
0x12a87 MOV %RSI,%R10 |
0x12a8a VADDPD %XMM6,%XMM14,%XMM11 |
0x12a8e AND $-0x4,%R10 |
0x12a92 ADD %R10,%RCX |
0x12a95 SUB %R10,%RSI |
0x12a98 CMP $0x1,%RSI |
0x12a9c JE 12ad7 |
0x12a9e ADD %R15,%R10 |
0x12aa1 MOV 0x38(%RSP),%R15 |
0x12aa6 VMOVAPD %XMM4,%XMM8 |
0x12aaa VMOVDQU (%R12,%R10,8),%XMM7 |
0x12ab0 VGATHERQPD %XMM8,(%R9,%XMM7,8),%XMM9 |
0x12ab6 VFNMADD132PD (%R15,%R10,8),%XMM11,%XMM9 |
0x12abc VUNPCKHPD %XMM9,%XMM9,%XMM11 |
0x12ac1 VADDPD %XMM9,%XMM11,%XMM10 |
0x12ac6 VADDSD %XMM1,%XMM10,%XMM12 |
0x12aca TEST $0x1,%SIL |
0x12ace JE 12aec |
0x12ad0 AND $-0x2,%RSI |
0x12ad4 ADD %RSI,%RCX |
0x12ad7 MOV (%R12,%RCX,8),%RSI |
0x12adb MOV 0x38(%RSP),%RDI |
0x12ae0 VMOVSD (%R9,%RSI,8),%XMM1 |
0x12ae6 VFNMADD231SD (%RDI,%RCX,8),%XMM1,%XMM12 |
0x12aec MOV 0x18(%RSP),%R13 |
0x12af1 MOV 0x8(%RSP),%RCX |
0x12af6 VMULSD %XMM12,%XMM5,%XMM6 |
0x12afb VDIVSD (%RCX,%RDX,8),%XMM6,%XMM14 |
0x12b00 VADDSD (%R13,%RDX,8),%XMM14,%XMM13 |
0x12b07 VMOVSD %XMM13,(%R13,%RDX,8) |
0x12b0e INC %RDX |
0x12b11 CMP %RDX,0x28(%RSP) |
0x12b16 JNE 125a6 |
0x12b40 VMOVDQU (%RSI),%YMM11 |
0x12b44 VMOVAPD %YMM2,%YMM8 |
0x12b48 MOV $0x20,%ECX |
0x12b4d VGATHERQPD %YMM8,(%R8,%YMM11,8),%YMM1 |
0x12b53 VFNMADD231PD (%RDI),%YMM1,%YMM0 |
0x12b58 JMP 1264c |
0x12b60 VMOVDQU (%R13),%YMM10 |
0x12b66 VMOVAPD %YMM2,%YMM9 |
0x12b6a MOV $0x20,%EAX |
0x12b6f VGATHERQPD %YMM9,(%R9,%YMM10,8),%YMM0 |
0x12b75 VFNMADD231PD (%RDI),%YMM0,%YMM6 |
0x12b7a JMP 128d8 |
0x12b80 VMOVSD %XMM1,%XMM1,%XMM12 |
0x12b84 JMP 12aec |
0x12b90 VMOVSD %XMM6,%XMM6,%XMM1 |
0x12b94 JMP 12843 |
0x12b99 VMOVSD %XMM1,%XMM1,%XMM12 |
0x12b9d VXORPD %XMM11,%XMM11,%XMM11 |
0x12ba2 XOR %R10D,%R10D |
0x12ba5 JMP 12a95 |
0x12baa VMOVSD %XMM6,%XMM6,%XMM1 |
0x12bae VXORPD %XMM7,%XMM7,%XMM7 |
0x12bb2 XOR %ECX,%ECX |
0x12bb4 JMP 127f7 |
/home/eoseret/qaas_runs_CPU_9468/172-019-1763/intel/AMG/build/AMG/AMG/parcsr_ls/ams.c: 3662 - 3682 |
-------------------------------------------------------------------------------- |
3662: #pragma omp parallel for private(i,ii,jj,res) HYPRE_SMP_SCHEDULE |
[...] |
3669: if (A_diag_data[A_diag_i[i]] != zero) |
3670: { |
3671: res = f_data[i]; |
3672: for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++) |
3673: { |
3674: ii = A_diag_j[jj]; |
3675: res -= A_diag_data[jj] * Vtemp_data[ii]; |
3676: } |
3677: for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) |
3678: { |
3679: ii = A_offd_j[jj]; |
3680: res -= A_offd_data[jj] * Vext_data[ii]; |
3681: } |
3682: u_data[i] += (relax_weight*res)/l1_norms[i]; |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○95.81 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○4.19 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.21 |
CQA speedup if FP arith vectorized | 2.41 |
CQA speedup if fully vectorized | 4.39 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 2.41 |
Bottlenecks | micro-operation queue, |
Function | hypre_ParCSRRelaxThreads._omp_fn.1 |
Source | ams.c:3662-3662,ams.c:3669-3672,ams.c:3675-3677,ams.c:3680-3682 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 98.83 |
CQA cycles if no scalar integer | 81.50 |
CQA cycles if FP arith vectorized | 41.04 |
CQA cycles if fully vectorized | 22.50 |
Front-end cycles | 98.83 |
DIV/SQRT cycles | 23.50 |
P0 cycles | 23.50 |
P1 cycles | 23.25 |
P2 cycles | 23.25 |
P3 cycles | 17.50 |
P4 cycles | 19.33 |
P5 cycles | 19.33 |
P6 cycles | 19.33 |
P7 cycles | 41.00 |
P8 cycles | 41.00 |
P9 cycles | 41.00 |
P10 cycles | 41.00 |
P11 cycles | 39.00 |
P12 cycles | 39.00 |
P13 cycles | 5.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 236.00 |
Nb uops | 593.00 |
Nb loads | 73.00 |
Nb stores | 1.00 |
Nb stack references | 7.00 |
FLOP/cycle | 1.49 |
Nb FLOP add-sub | 21.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 62.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 16.67 |
Bytes prefetched | 0.00 |
Bytes loaded | 1640.00 |
Bytes stored | 8.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 64.46 |
Vectorization ratio load | 84.21 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 61.54 |
Vectorization ratio fma | 88.89 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 55.88 |
Vector-efficiency ratio all | 32.13 |
Vector-efficiency ratio load | 41.45 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 20.19 |
Vector-efficiency ratio fma | 43.06 |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 29.78 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.21 |
CQA speedup if FP arith vectorized | 2.41 |
CQA speedup if fully vectorized | 4.39 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 2.41 |
Bottlenecks | micro-operation queue, |
Function | hypre_ParCSRRelaxThreads._omp_fn.1 |
Source | ams.c:3662-3662,ams.c:3669-3672,ams.c:3675-3677,ams.c:3680-3682 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 98.83 |
CQA cycles if no scalar integer | 81.50 |
CQA cycles if FP arith vectorized | 41.04 |
CQA cycles if fully vectorized | 22.50 |
Front-end cycles | 98.83 |
DIV/SQRT cycles | 23.50 |
P0 cycles | 23.50 |
P1 cycles | 23.25 |
P2 cycles | 23.25 |
P3 cycles | 17.50 |
P4 cycles | 19.33 |
P5 cycles | 19.33 |
P6 cycles | 19.33 |
P7 cycles | 41.00 |
P8 cycles | 41.00 |
P9 cycles | 41.00 |
P10 cycles | 41.00 |
P11 cycles | 39.00 |
P12 cycles | 39.00 |
P13 cycles | 5.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 236.00 |
Nb uops | 593.00 |
Nb loads | 73.00 |
Nb stores | 1.00 |
Nb stack references | 7.00 |
FLOP/cycle | 1.49 |
Nb FLOP add-sub | 21.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 62.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 16.67 |
Bytes prefetched | 0.00 |
Bytes loaded | 1640.00 |
Bytes stored | 8.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 64.46 |
Vectorization ratio load | 84.21 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | 61.54 |
Vectorization ratio fma | 88.89 |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 55.88 |
Vector-efficiency ratio all | 32.13 |
Vector-efficiency ratio load | 41.45 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | 20.19 |
Vector-efficiency ratio fma | 43.06 |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 29.78 |
Path / |
Function | hypre_ParCSRRelaxThreads._omp_fn.1 |
Source file and lines | ams.c:3662-3682 |
Module | libparcsr_ls.so |
nb instructions | 236 |
nb uops | 593 |
loop length | 1070 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 15 |
used ymm registers | 13 |
used zmm registers | 0 |
nb stack references | 7 |
ADD-SUB / MUL ratio | 13.00 |
micro-operation queue | 98.83 cycles |
front end | 98.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 23.50 | 23.50 | 23.25 | 23.25 | 17.50 | 19.33 | 19.33 | 19.33 | 41.00 | 41.00 | 41.00 | 41.00 | 39.00 | 39.00 |
cycles | 23.50 | 23.50 | 23.25 | 23.25 | 17.50 | 19.33 | 19.33 | 19.33 | 41.00 | 41.00 | 41.00 | 41.00 | 39.00 | 39.00 |
Cycles executing div or sqrt instructions | 5.00 |
Front-end | 98.83 |
Dispatch | 41.00 |
DIV/SQRT | 5.00 |
Overall L1 | 98.83 |
all | 43% |
load | 94% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 73% |
load | 80% |
store | 0% |
mul | 0% |
add-sub | 61% |
fma | 88% |
div/sqrt | 0% |
other | 80% |
all | 64% |
load | 84% |
store | 0% |
mul | 0% |
add-sub | 61% |
fma | 88% |
div/sqrt | 0% |
other | 55% |
all | 27% |
load | 44% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 34% |
load | 40% |
store | 12% |
mul | 12% |
add-sub | 20% |
fma | 43% |
div/sqrt | 12% |
other | 37% |
all | 32% |
load | 41% |
store | 12% |
mul | 12% |
add-sub | 20% |
fma | 43% |
div/sqrt | 12% |
other | 29% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INC %RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RDX,0x28(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JE 12b1c <hypre_ParCSRRelaxThreads._omp_fn.1+0x63c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RCX,%RDX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (,%RAX,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
LEA (%R11,%RSI,1),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VCOMISD (%RDI),%XMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
JE 12598 <hypre_ParCSRRelaxThreads._omp_fn.1+0xb8> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x20(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVSD (%R10,%RDX,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x8(%R15,%RDX,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R10,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 12b90 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6b0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
SUB %RAX,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x1(%R10),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x2,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JBE 12baa <hypre_ParCSRRelaxThreads._omp_fn.1+0x6ca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R10,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
ADD %RBX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
SHR $0x2,%R15 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SAL $0x5,%R15 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA -0x20(%R15),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SHR $0x5,%R13 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
INC %R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
AND $0x7,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 126eb <hypre_ParCSRRelaxThreads._omp_fn.1+0x20b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x1,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 126c9 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1e9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x2,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 126b0 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1d0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x3,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 12697 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1b7> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x4,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 1267e <hypre_ParCSRRelaxThreads._omp_fn.1+0x19e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x5,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 12665 <hypre_ParCSRRelaxThreads._omp_fn.1+0x185> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x6,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 12b40 <hypre_ParCSRRelaxThreads._omp_fn.1+0x660> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VMOVDQU (%RSI,%RCX,1),%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM10,(%R8,%YMM9,8),%YMM12 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RCX,1),%YMM12,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RCX,1),%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM14,(%R8,%YMM13,8),%YMM15 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RCX,1),%YMM15,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RCX,1),%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM7,(%R8,%YMM8,8),%YMM11 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RCX,1),%YMM11,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RCX,1),%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM10,(%R8,%YMM9,8),%YMM1 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RCX,1),%YMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RCX,1),%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM12,(%R8,%YMM14,8),%YMM13 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RCX,1),%YMM13,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RCX,1),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM15,(%R8,%YMM7,8),%YMM8 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RCX,1),%YMM8,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RCX,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 127c9 <hypre_ParCSRRelaxThreads._omp_fn.1+0x2e9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VEXTRACTF128 $0x1,%YMM0,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM0,%XMM9,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VUNPCKHPD %XMM10,%XMM10,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
VADDPD %XMM10,%XMM1,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VADDSD %XMM12,%XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
TEST $0x3,%R10B | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 12843 <hypre_ParCSRRelaxThreads._omp_fn.1+0x363> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
VADDPD %XMM9,%XMM0,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
AND $-0x4,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RCX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SUB %RCX,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x1,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 12833 <hypre_ParCSRRelaxThreads._omp_fn.1+0x353> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %R14,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVAPD %XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VMOVDQU (%RBX,%RCX,8),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VGATHERQPD %XMM14,(%R8,%XMM15,8),%XMM13 | 18 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.25 | 1.58 | 0.58 | 0.58 | 1.50 | 1.50 | 0-16 | 3 | vect (25.0%) |
VFNMADD132PD (%R11,%RCX,8),%XMM7,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
VUNPCKHPD %XMM13,%XMM13,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
VADDPD %XMM13,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VADDSD %XMM7,%XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
TEST $0x1,%R10B | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 12843 <hypre_ParCSRRelaxThreads._omp_fn.1+0x363> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
AND $-0x2,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %R10,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV (%RBX,%RAX,8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVSD (%R11,%RAX,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
VFNMADD231SD (%R8,%R14,8),%XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x10(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RAX,%RDX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RAX,%RDX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %RSI,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 12b80 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
SUB %RCX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
LEA -0x1(%RSI),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x2,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JBE 12b99 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6b9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x38(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (,%RCX,8),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%R12,%RDI,1),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %R14,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SHR $0x2,%R14 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SAL $0x5,%R14 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA -0x20(%R14),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SHR $0x5,%R10 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
INC %R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
AND $0x7,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 12983 <hypre_ParCSRRelaxThreads._omp_fn.1+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x1,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 1295f <hypre_ParCSRRelaxThreads._omp_fn.1+0x47f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x2,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 12944 <hypre_ParCSRRelaxThreads._omp_fn.1+0x464> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x3,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 12929 <hypre_ParCSRRelaxThreads._omp_fn.1+0x449> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x4,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 1290e <hypre_ParCSRRelaxThreads._omp_fn.1+0x42e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x5,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 128f3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x413> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x6,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 12b60 <hypre_ParCSRRelaxThreads._omp_fn.1+0x680> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VMOVDQU (%R13,%RAX,1),%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM15 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RAX,1),%YMM15,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%R13,%RAX,1),%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM13,(%R9,%YMM8,8),%YMM7 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RAX,1),%YMM7,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%R13,%RAX,1),%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM11,(%R9,%YMM9,8),%YMM10 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RAX,1),%YMM10,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%R13,%RAX,1),%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM0 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RAX,1),%YMM0,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%R13,%RAX,1),%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM15,(%R9,%YMM13,8),%YMM8 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RAX,1),%YMM8,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%R13,%RAX,1),%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM7,(%R9,%YMM11,8),%YMM9 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RAX,1),%YMM9,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RAX,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 12a69 <hypre_ParCSRRelaxThreads._omp_fn.1+0x589> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VEXTRACTF128 $0x1,%YMM6,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM6,%XMM14,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VUNPCKHPD %XMM13,%XMM13,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
VADDPD %XMM13,%XMM15,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VADDSD %XMM0,%XMM1,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
TEST $0x3,%SIL | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 12aec <hypre_ParCSRRelaxThreads._omp_fn.1+0x60c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
VADDPD %XMM6,%XMM14,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
AND $-0x4,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SUB %R10,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x1,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 12ad7 <hypre_ParCSRRelaxThreads._omp_fn.1+0x5f7> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %R15,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x38(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVAPD %XMM4,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VMOVDQU (%R12,%R10,8),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VGATHERQPD %XMM8,(%R9,%XMM7,8),%XMM9 | 18 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.25 | 1.58 | 0.58 | 0.58 | 1.50 | 1.50 | 0-16 | 3 | vect (25.0%) |
VFNMADD132PD (%R15,%R10,8),%XMM11,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
VUNPCKHPD %XMM9,%XMM9,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
VADDPD %XMM9,%XMM11,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VADDSD %XMM1,%XMM10,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
TEST $0x1,%SIL | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 12aec <hypre_ParCSRRelaxThreads._omp_fn.1+0x60c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
AND $-0x2,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RSI,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV (%R12,%RCX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x38(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVSD (%R9,%RSI,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
VFNMADD231SD (%RDI,%RCX,8),%XMM1,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x18(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMULSD %XMM12,%XMM5,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VDIVSD (%RCX,%RDX,8),%XMM6,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 | scal (12.5%) |
VADDSD (%R13,%RDX,8),%XMM14,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VMOVSD %XMM13,(%R13,%RDX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
INC %RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RDX,0x28(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
JNE 125a6 <hypre_ParCSRRelaxThreads._omp_fn.1+0xc6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VMOVDQU (%RSI),%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
MOV $0x20,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VGATHERQPD %YMM8,(%R8,%YMM11,8),%YMM1 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI),%YMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
JMP 1264c <hypre_ParCSRRelaxThreads._omp_fn.1+0x16c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VMOVDQU (%R13),%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
MOV $0x20,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VGATHERQPD %YMM9,(%R9,%YMM10,8),%YMM0 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI),%YMM0,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
JMP 128d8 <hypre_ParCSRRelaxThreads._omp_fn.1+0x3f8> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VMOVSD %XMM1,%XMM1,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JMP 12aec <hypre_ParCSRRelaxThreads._omp_fn.1+0x60c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VMOVSD %XMM6,%XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JMP 12843 <hypre_ParCSRRelaxThreads._omp_fn.1+0x363> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VMOVSD %XMM1,%XMM1,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
VXORPD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 12a95 <hypre_ParCSRRelaxThreads._omp_fn.1+0x5b5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VMOVSD %XMM6,%XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 127f7 <hypre_ParCSRRelaxThreads._omp_fn.1+0x317> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
Function | hypre_ParCSRRelaxThreads._omp_fn.1 |
Source file and lines | ams.c:3662-3682 |
Module | libparcsr_ls.so |
nb instructions | 236 |
nb uops | 593 |
loop length | 1070 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 15 |
used ymm registers | 13 |
used zmm registers | 0 |
nb stack references | 7 |
ADD-SUB / MUL ratio | 13.00 |
micro-operation queue | 98.83 cycles |
front end | 98.83 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 23.50 | 23.50 | 23.25 | 23.25 | 17.50 | 19.33 | 19.33 | 19.33 | 41.00 | 41.00 | 41.00 | 41.00 | 39.00 | 39.00 |
cycles | 23.50 | 23.50 | 23.25 | 23.25 | 17.50 | 19.33 | 19.33 | 19.33 | 41.00 | 41.00 | 41.00 | 41.00 | 39.00 | 39.00 |
Cycles executing div or sqrt instructions | 5.00 |
Front-end | 98.83 |
Dispatch | 41.00 |
DIV/SQRT | 5.00 |
Overall L1 | 98.83 |
all | 43% |
load | 94% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 73% |
load | 80% |
store | 0% |
mul | 0% |
add-sub | 61% |
fma | 88% |
div/sqrt | 0% |
other | 80% |
all | 64% |
load | 84% |
store | 0% |
mul | 0% |
add-sub | 61% |
fma | 88% |
div/sqrt | 0% |
other | 55% |
all | 27% |
load | 44% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 34% |
load | 40% |
store | 12% |
mul | 12% |
add-sub | 20% |
fma | 43% |
div/sqrt | 12% |
other | 37% |
all | 32% |
load | 41% |
store | 12% |
mul | 12% |
add-sub | 20% |
fma | 43% |
div/sqrt | 12% |
other | 29% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INC %RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RDX,0x28(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JE 12b1c <hypre_ParCSRRelaxThreads._omp_fn.1+0x63c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x30(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RCX,%RDX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (,%RAX,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
LEA (%R11,%RSI,1),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VCOMISD (%RDI),%XMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
JE 12598 <hypre_ParCSRRelaxThreads._omp_fn.1+0xb8> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x20(%RSP),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVSD (%R10,%RDX,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x8(%R15,%RDX,8),%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R10,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 12b90 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6b0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
SUB %RAX,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x1(%R10),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x2,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JBE 12baa <hypre_ParCSRRelaxThreads._omp_fn.1+0x6ca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R10,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
ADD %RBX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
SHR $0x2,%R15 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SAL $0x5,%R15 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA -0x20(%R15),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SHR $0x5,%R13 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
INC %R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
AND $0x7,%R13D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 126eb <hypre_ParCSRRelaxThreads._omp_fn.1+0x20b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x1,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 126c9 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1e9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x2,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 126b0 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1d0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x3,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 12697 <hypre_ParCSRRelaxThreads._omp_fn.1+0x1b7> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x4,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 1267e <hypre_ParCSRRelaxThreads._omp_fn.1+0x19e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x5,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 12665 <hypre_ParCSRRelaxThreads._omp_fn.1+0x185> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x6,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 12b40 <hypre_ParCSRRelaxThreads._omp_fn.1+0x660> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VMOVDQU (%RSI,%RCX,1),%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM10,(%R8,%YMM9,8),%YMM12 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RCX,1),%YMM12,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RCX,1),%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM14,(%R8,%YMM13,8),%YMM15 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RCX,1),%YMM15,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RCX,1),%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM7,(%R8,%YMM8,8),%YMM11 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RCX,1),%YMM11,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RCX,1),%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM10,(%R8,%YMM9,8),%YMM1 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RCX,1),%YMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RCX,1),%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM12,(%R8,%YMM14,8),%YMM13 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RCX,1),%YMM13,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%RSI,%RCX,1),%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM15,(%R8,%YMM7,8),%YMM8 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RCX,1),%YMM8,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RCX,%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 127c9 <hypre_ParCSRRelaxThreads._omp_fn.1+0x2e9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VEXTRACTF128 $0x1,%YMM0,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM0,%XMM9,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VUNPCKHPD %XMM10,%XMM10,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
VADDPD %XMM10,%XMM1,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VADDSD %XMM12,%XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
TEST $0x3,%R10B | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 12843 <hypre_ParCSRRelaxThreads._omp_fn.1+0x363> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
VADDPD %XMM9,%XMM0,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
AND $-0x4,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RCX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SUB %RCX,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x1,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 12833 <hypre_ParCSRRelaxThreads._omp_fn.1+0x353> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %R14,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVAPD %XMM4,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VMOVDQU (%RBX,%RCX,8),%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VGATHERQPD %XMM14,(%R8,%XMM15,8),%XMM13 | 18 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.25 | 1.58 | 0.58 | 0.58 | 1.50 | 1.50 | 0-16 | 3 | vect (25.0%) |
VFNMADD132PD (%R11,%RCX,8),%XMM7,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
VUNPCKHPD %XMM13,%XMM13,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
VADDPD %XMM13,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VADDSD %XMM7,%XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
TEST $0x1,%R10B | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 12843 <hypre_ParCSRRelaxThreads._omp_fn.1+0x363> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
AND $-0x2,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %R10,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV (%RBX,%RAX,8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVSD (%R11,%RAX,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
VFNMADD231SD (%R8,%R14,8),%XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x10(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RAX,%RDX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RAX,%RDX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %RSI,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 12b80 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6a0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
SUB %RCX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RCX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
LEA -0x1(%RSI),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x2,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JBE 12b99 <hypre_ParCSRRelaxThreads._omp_fn.1+0x6b9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x38(%RSP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (,%RCX,8),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
VXORPD %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%R12,%RDI,1),%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %R14,%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RSI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SHR $0x2,%R14 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SAL $0x5,%R14 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA -0x20(%R14),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SHR $0x5,%R10 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
INC %R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
AND $0x7,%R10D | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 12983 <hypre_ParCSRRelaxThreads._omp_fn.1+0x4a3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x1,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 1295f <hypre_ParCSRRelaxThreads._omp_fn.1+0x47f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x2,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 12944 <hypre_ParCSRRelaxThreads._omp_fn.1+0x464> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x3,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 12929 <hypre_ParCSRRelaxThreads._omp_fn.1+0x449> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x4,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 1290e <hypre_ParCSRRelaxThreads._omp_fn.1+0x42e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x5,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 128f3 <hypre_ParCSRRelaxThreads._omp_fn.1+0x413> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP $0x6,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 12b60 <hypre_ParCSRRelaxThreads._omp_fn.1+0x680> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VMOVDQU (%R13,%RAX,1),%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM15 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RAX,1),%YMM15,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%R13,%RAX,1),%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM13,(%R9,%YMM8,8),%YMM7 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RAX,1),%YMM7,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%R13,%RAX,1),%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM11,(%R9,%YMM9,8),%YMM10 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RAX,1),%YMM10,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%R13,%RAX,1),%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM12,(%R9,%YMM14,8),%YMM0 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RAX,1),%YMM0,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%R13,%RAX,1),%YMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM15,(%R9,%YMM13,8),%YMM8 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RAX,1),%YMM8,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVDQU (%R13,%RAX,1),%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
VGATHERQPD %YMM7,(%R9,%YMM11,8),%YMM9 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI,%RAX,1),%YMM9,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
ADD $0x20,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RAX,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 12a69 <hypre_ParCSRRelaxThreads._omp_fn.1+0x589> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VEXTRACTF128 $0x1,%YMM6,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM6,%XMM14,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VUNPCKHPD %XMM13,%XMM13,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
VADDPD %XMM13,%XMM15,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VADDSD %XMM0,%XMM1,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
TEST $0x3,%SIL | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 12aec <hypre_ParCSRRelaxThreads._omp_fn.1+0x60c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
VADDPD %XMM6,%XMM14,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
AND $-0x4,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %R10,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SUB %R10,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP $0x1,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 12ad7 <hypre_ParCSRRelaxThreads._omp_fn.1+0x5f7> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %R15,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x38(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVAPD %XMM4,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
VMOVDQU (%R12,%R10,8),%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VGATHERQPD %XMM8,(%R9,%XMM7,8),%XMM9 | 18 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.25 | 1.58 | 0.58 | 0.58 | 1.50 | 1.50 | 0-16 | 3 | vect (25.0%) |
VFNMADD132PD (%R15,%R10,8),%XMM11,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (25.0%) |
VUNPCKHPD %XMM9,%XMM9,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
VADDPD %XMM9,%XMM11,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VADDSD %XMM1,%XMM10,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
TEST $0x1,%SIL | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 12aec <hypre_ParCSRRelaxThreads._omp_fn.1+0x60c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
AND $-0x2,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RSI,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV (%R12,%RCX,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x38(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVSD (%R9,%RSI,8),%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
VFNMADD231SD (%RDI,%RCX,8),%XMM1,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x18(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMULSD %XMM12,%XMM5,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VDIVSD (%RCX,%RDX,8),%XMM6,%XMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 | scal (12.5%) |
VADDSD (%R13,%RDX,8),%XMM14,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VMOVSD %XMM13,(%R13,%RDX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
INC %RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RDX,0x28(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
JNE 125a6 <hypre_ParCSRRelaxThreads._omp_fn.1+0xc6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VMOVDQU (%RSI),%YMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
MOV $0x20,%ECX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VGATHERQPD %YMM8,(%R8,%YMM11,8),%YMM1 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI),%YMM1,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
JMP 1264c <hypre_ParCSRRelaxThreads._omp_fn.1+0x16c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VMOVDQU (%R13),%YMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 | vect (50.0%) |
VMOVAPD %YMM2,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (50.0%) |
MOV $0x20,%EAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VGATHERQPD %YMM9,(%R9,%YMM10,8),%YMM0 | 24 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1.50 | 1.50 | 2.50 | 2.50 | 0-16 | 4 | vect (50.0%) |
VFNMADD231PD (%RDI),%YMM0,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 | vect (50.0%) |
JMP 128d8 <hypre_ParCSRRelaxThreads._omp_fn.1+0x3f8> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VMOVSD %XMM1,%XMM1,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JMP 12aec <hypre_ParCSRRelaxThreads._omp_fn.1+0x60c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VMOVSD %XMM6,%XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JMP 12843 <hypre_ParCSRRelaxThreads._omp_fn.1+0x363> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VMOVSD %XMM1,%XMM1,%XMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
VXORPD %XMM11,%XMM11,%XMM11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 12a95 <hypre_ParCSRRelaxThreads._omp_fn.1+0x5b5> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VMOVSD %XMM6,%XMM6,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 127f7 <hypre_ParCSRRelaxThreads._omp_fn.1+0x317> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |