Loop Id: 1794 | Module: exec | Source: par_lr_interp.c:1221-1675 [...] | Coverage: 0.05% |
---|
Loop Id: 1794 | Module: exec | Source: par_lr_interp.c:1221-1675 [...] | Coverage: 0.05% |
---|
0x470ba0 MOV 0x38(%RSP),%RCX |
0x470ba5 MOV 0x40(%RSP),%RDX |
0x470baa MOV (%RCX),%RCX |
0x470bad VMOVSD (%RCX,%RAX,8),%XMM21 |
0x470bb4 VADDSD (%RDX,%R14,8),%XMM21,%XMM21 |
0x470bbb VMOVSD %XMM21,(%RCX,%RAX,8) |
0x470bc2 INC %R14 |
0x470bc5 CMP %R13,%R14 |
0x470bc8 JE 471876 |
0x470bce MOV 0xc8(%RSP),%RAX |
0x470bd6 MOV (%RAX,%R14,8),%RCX |
0x470bda MOV (%RBX,%RCX,8),%RAX |
0x470bde CMP %R10,%RAX |
0x470be1 JGE 470ba0 |
0x470be3 CMP 0x70(%RSP),%RAX |
0x470be8 JNE 470d30 |
0x470bee MOV 0x210(%RSP),%RDX |
0x470bf6 MOV 0x40(%RSP),%RSI |
0x470bfb VXORPD %XMM21,%XMM21,%XMM21 |
0x470c01 XOR %EAX,%EAX |
0x470c03 MOV (%RDX,%RCX,8),%RDI |
0x470c07 MOV 0x8(%RDX,%RCX,8),%RDX |
0x470c0c VUCOMISD (%RSI,%RDI,8),%XMM21 |
0x470c13 MOV %RDX,0x68(%RSP) |
0x470c18 SETBE %AL |
0x470c1b LEA -0x1(%RAX,%RAX,1),%R9 |
0x470c20 LEA 0x1(%RDI),%RAX |
0x470c24 CMP %RDX,%RAX |
0x470c27 JGE 470e73 |
0x470c2d MOV %RDI,%RSI |
0x470c30 NOT %RSI |
0x470c33 VCVTSI2SD %R9,%XMM26,%XMM21 |
0x470c39 ADD %RDX,%RSI |
0x470c3c MOV %RSI,%RDX |
0x470c3f AND $-0x8,%RDX |
0x470c43 JE 470d75 |
0x470c49 MOV %R9,0xd0(%RSP) |
0x470c51 MOV %RDI,%R15 |
0x470c54 VBROADCASTSD %XMM21,%ZMM22 |
0x470c5a LEA -0x1(%RDX),%RDI |
0x470c5e VXORPD %XMM21,%XMM21,%XMM21 |
0x470c64 MOV 0x128(%RSP),%R9 |
0x470c6c LEA (%R9,%R15,8),%R11 |
0x470c70 MOV 0x170(%RSP),%R9 |
0x470c78 MOV %R15,0xf8(%RSP) |
0x470c80 LEA (%R9,%R15,8),%R9 |
0x470c84 XOR %R15D,%R15D |
0x470c87 NOPW (%RAX,%RAX,1) |
(1800) 0x470c90 VMOVDQU64 (%R9,%R15,8),%ZMM23 |
(1800) 0x470c97 VPXORD %XMM24,%XMM24,%XMM24 |
(1800) 0x470c9d VPCMPEQQ %ZMM18,%ZMM23,%K0 |
(1800) 0x470ca3 KXNORW %K0,%K0,%K1 |
(1800) 0x470ca7 VPGATHERQQ (%RBX,%ZMM23,8),%ZMM24{%K1} |
(1800) 0x470cae VPCMPNLTQ %ZMM19,%ZMM24,%K1 |
(1800) 0x470cb5 KORB %K0,%K1,%K1 |
(1800) 0x470cb9 VMOVUPD (%R11,%R15,8),%ZMM23{%K1}{z} |
(1800) 0x470cc0 ADD $0x8,%R15 |
(1800) 0x470cc4 VMULPD %ZMM22,%ZMM23,%ZMM24 |
(1800) 0x470cca VCMPPD $0x1,%ZMM4,%ZMM24,%K1{%K1} |
(1800) 0x470cd1 VADDPD %ZMM23,%ZMM21,%ZMM21{%K1} |
(1800) 0x470cd7 CMP %RDI,%R15 |
(1800) 0x470cda JBE 470c90 |
0x470cdc VEXTRACTF64X4 $0x1,%ZMM21,%YMM23 |
0x470ce3 VADDPD %ZMM23,%ZMM21,%ZMM21 |
0x470ce9 VEXTRACTF32X4 $0x1,%YMM21,%XMM23 |
0x470cf0 VADDPD %XMM23,%XMM21,%XMM21 |
0x470cf6 VPERMILPD $0x1,%XMM21,%XMM23 |
0x470cfd VADDSD %XMM23,%XMM21,%XMM21 |
0x470d03 CMP %RDX,%RSI |
0x470d06 JNE 470d8b |
0x470d0c MOV 0x98(%RSP),%R11 |
0x470d14 MOV 0x30(%RSP),%R15 |
0x470d19 MOV 0xd0(%RSP),%R9 |
0x470d21 MOV 0xf8(%RSP),%RDI |
0x470d29 JMP 470e73 |
0x470d30 MOV 0xa8(%RSP),%RAX |
0x470d38 CMPQ $-0x3,(%RAX,%RCX,8) |
0x470d3d JE 470bc2 |
0x470d43 CMPQ $0x1,0x208(%RSP) |
0x470d4c JE 470d64 |
0x470d4e MOV 0x200(%RSP),%RDX |
0x470d56 MOV (%RDX,%R12,8),%RAX |
0x470d5a CMP (%RDX,%RCX,8),%RAX |
0x470d5e JNE 470bc2 |
0x470d64 MOV 0x40(%RSP),%RAX |
0x470d69 VADDSD (%RAX,%R14,8),%XMM17,%XMM17 |
0x470d70 JMP 470bc2 |
0x470d75 VBROADCASTSD %XMM21,%ZMM22 |
0x470d7b VPBROADCASTQ %RSI,%ZMM23 |
0x470d81 VXORPD %XMM21,%XMM21,%XMM21 |
0x470d87 XOR %EDX,%EDX |
0x470d89 JMP 470dae |
0x470d8b MOV 0x98(%RSP),%R11 |
0x470d93 MOV 0x30(%RSP),%R15 |
0x470d98 MOV 0xd0(%RSP),%R9 |
0x470da0 MOV 0xf8(%RSP),%RDI |
0x470da8 VPBROADCASTQ %RSI,%ZMM23 |
0x470dae MOV 0xc8(%RSP),%RSI |
0x470db6 VPBROADCASTQ %RDX,%ZMM24 |
0x470dbc ADD %RAX,%RDX |
0x470dbf VPSUBQ %ZMM24,%ZMM23,%ZMM23 |
0x470dc5 VPXORD %XMM24,%XMM24,%XMM24 |
0x470dcb VPCMPNLEUQ %ZMM3,%ZMM23,%K1 |
0x470dd2 KMOVQ %K1,%K2 |
0x470dd7 VMOVDQU64 (%RSI,%RDX,8),%ZMM23{%K1}{z} |
0x470dde MOV 0x40(%RSP),%RSI |
0x470de3 VMOVDQA64 %ZMM23,%ZMM12{%K1} |
0x470de9 VPSLLQ $0x3,%ZMM12,%ZMM23 |
0x470df0 VPADDQ %ZMM23,%ZMM0,%ZMM23 |
0x470df6 VPGATHERQQ (,%ZMM23,1),%ZMM24{%K2} |
0x470e01 VPCMPEQQ %ZMM18,%ZMM12,%K2{%K1} |
0x470e07 VMOVDQA64 %ZMM24,%ZMM10{%K1} |
0x470e0d VPCMPNLTQ %ZMM19,%ZMM10,%K0 |
0x470e14 KANDNB %K2,%K0,%K2 |
0x470e18 KANDB %K0,%K1,%K0 |
0x470e1c KORB %K2,%K0,%K1 |
0x470e20 VMOVUPD (%RSI,%RDX,8),%ZMM23{%K1}{z} |
0x470e27 VMOVAPD %ZMM23,%ZMM9{%K1} |
0x470e2d VMULPD %ZMM22,%ZMM9,%ZMM22 |
0x470e33 VCMPPD $0x1,%ZMM4,%ZMM22,%K2 |
0x470e3a VBLENDMPD %ZMM9,%ZMM2,%ZMM22{%K2} |
0x470e40 VMOVAPD %ZMM22,%ZMM22{%K1}{z} |
0x470e46 VEXTRACTF64X4 $0x1,%ZMM22,%YMM23 |
0x470e4d VADDPD %ZMM23,%ZMM22,%ZMM22 |
0x470e53 VEXTRACTF32X4 $0x1,%YMM22,%XMM23 |
0x470e5a VADDPD %XMM23,%XMM22,%XMM22 |
0x470e60 VPERMILPD $0x1,%XMM22,%XMM23 |
0x470e67 VADDSD %XMM23,%XMM22,%XMM22 |
0x470e6d VADDSD %XMM22,%XMM21,%XMM21 |
0x470e73 MOV 0x90(%RSP),%RDX |
0x470e7b MOV (%RDX),%RDX |
0x470e7e MOV %RDX,0x120(%RSP) |
0x470e86 CMP $0x2,%RDX |
0x470e8a JL 470fe9 |
0x470e90 MOV 0x178(%RSP),%RSI |
0x470e98 MOV %R13,%R15 |
0x470e9b MOV %R9,%R13 |
0x470e9e MOV (%RSI,%RCX,8),%R9 |
0x470ea2 MOV 0x8(%RSI,%RCX,8),%RDX |
0x470ea7 SUB %R9,%RDX |
0x470eaa JLE 470fde |
0x470eb0 VCVTSI2SD %R13,%XMM26,%XMM22 |
0x470eb6 MOV %RDX,%RSI |
0x470eb9 AND $-0x8,%RSI |
0x470ebd MOV %R9,0xe0(%RSP) |
0x470ec5 MOV %R13,%R9 |
0x470ec8 JE 4715ab |
0x470ece MOV %RDI,0xf8(%RSP) |
0x470ed6 MOV %R9,0xd0(%RSP) |
0x470ede MOV %RSI,0x168(%RSP) |
0x470ee6 LEA -0x1(%RSI),%R11 |
0x470eea VBROADCASTSD %XMM22,%ZMM22 |
0x470ef0 VXORPD %XMM23,%XMM23,%XMM23 |
0x470ef6 XOR %R13D,%R13D |
0x470ef9 MOV 0xd8(%RSP),%R9 |
0x470f01 MOV 0xe0(%RSP),%RSI |
0x470f09 MOV 0x108(%RSP),%R15 |
0x470f11 MOV 0x20(%RSP),%RDI |
0x470f16 LEA (%R9,%RSI,8),%R9 |
0x470f1a LEA (%R15,%RSI,8),%R15 |
0x470f1e XCHG %AX,%AX |
(1799) 0x470f20 VMOVDQU64 (%R15,%R13,8),%ZMM24 |
(1799) 0x470f27 KXNORW %K0,%K0,%K1 |
(1799) 0x470f2b VPXORD %XMM25,%XMM25,%XMM25 |
(1799) 0x470f31 VPGATHERQQ (%RDI,%ZMM24,8),%ZMM25{%K1} |
(1799) 0x470f38 VPCMPNLTQ %ZMM20,%ZMM25,%K1 |
(1799) 0x470f3f VMOVUPD (%R9,%R13,8),%ZMM24{%K1}{z} |
(1799) 0x470f46 ADD $0x8,%R13 |
(1799) 0x470f4a VMULPD %ZMM22,%ZMM24,%ZMM25 |
(1799) 0x470f50 VCMPPD $0x1,%ZMM4,%ZMM25,%K1{%K1} |
(1799) 0x470f57 VADDPD %ZMM24,%ZMM23,%ZMM23{%K1} |
(1799) 0x470f5d CMP %R11,%R13 |
(1799) 0x470f60 JBE 470f20 |
0x470f62 VEXTRACTF64X4 $0x1,%ZMM23,%YMM24 |
0x470f69 MOV 0x168(%RSP),%RSI |
0x470f71 VADDPD %ZMM24,%ZMM23,%ZMM23 |
0x470f77 VEXTRACTF32X4 $0x1,%YMM23,%XMM24 |
0x470f7e VADDPD %XMM24,%XMM23,%XMM23 |
0x470f84 VPERMILPD $0x1,%XMM23,%XMM24 |
0x470f8b VADDSD %XMM24,%XMM23,%XMM23 |
0x470f91 VADDSD %XMM23,%XMM21,%XMM21 |
0x470f97 CMP %RSI,%RDX |
0x470f9a JNE 4715cb |
0x470fa0 MOV 0x98(%RSP),%R11 |
0x470fa8 MOV 0x30(%RSP),%R15 |
0x470fad MOV 0xf0(%RSP),%R13 |
0x470fb5 MOV 0xd0(%RSP),%R9 |
0x470fbd MOV 0xf8(%RSP),%RDI |
0x470fc5 MOV 0x40(%RSP),%RDX |
0x470fca VUCOMISD %XMM5,%XMM21 |
0x470fd0 VMOVSD (%RDX,%R14,8),%XMM22 |
0x470fd7 JNE 471001 |
0x470fd9 JMP 4716d2 |
0x470fde MOV %R13,%R9 |
0x470fe1 MOV %R15,%R13 |
0x470fe4 MOV 0x30(%RSP),%R15 |
0x470fe9 MOV 0x40(%RSP),%RDX |
0x470fee VUCOMISD %XMM5,%XMM21 |
0x470ff4 VMOVSD (%RDX,%R14,8),%XMM22 |
0x470ffb JE 4716d2 |
0x471001 VDIVSD %XMM21,%XMM22,%XMM21 |
0x471007 MOV 0x68(%RSP),%RDX |
0x47100c CMP %RDX,%RAX |
0x47100f JGE 471044 |
0x471011 MOV %EDI,%ESI |
0x471013 NOT %ESI |
0x471015 VCVTSI2SD %R9,%XMM26,%XMM22 |
0x47101b MOV %R9,0xd0(%RSP) |
0x471023 ADD %EDX,%ESI |
0x471025 SUB %RDI,%RDX |
0x471028 ADD $-0x2,%RDX |
0x47102c AND $0x7,%RSI |
0x471030 JNE 4710a8 |
0x471032 MOV 0xd0(%RSP),%R9 |
0x47103a CMP $0x7,%RDX |
0x47103e JAE 471152 |
0x471044 CMPQ $0x2,0x120(%RSP) |
0x47104d JL 470bc2 |
0x471053 MOV 0x178(%RSP),%RDX |
0x47105b MOV (%RDX,%RCX,8),%RAX |
0x47105f MOV 0x8(%RDX,%RCX,8),%RCX |
0x471064 MOV %RCX,%RSI |
0x471067 SUB %RAX,%RSI |
0x47106a JLE 470bc2 |
0x471070 VCVTSI2SD %R9,%XMM26,%XMM22 |
0x471076 CMP $0x4,%RSI |
0x47107a JAE 4716dd |
0x471080 MOV %RSI,%RDX |
0x471083 AND $-0x4,%RDX |
0x471087 CMP %RSI,%RDX |
0x47108a JAE 471869 |
0x471090 MOV 0xf0(%RSP),%R13 |
0x471098 ADD %RDX,%RAX |
0x47109b JMP 47155c |
(1798) 0x4710a0 INC %RAX |
(1798) 0x4710a3 DEC %RSI |
(1798) 0x4710a6 JE 471032 |
(1798) 0x4710a8 MOV 0xc8(%RSP),%RDI |
(1798) 0x4710b0 MOV (%RDI,%RAX,8),%RDI |
(1798) 0x4710b4 MOV (%RBX,%RDI,8),%R9 |
(1798) 0x4710b8 CMP %R10,%R9 |
(1798) 0x4710bb JL 4710fe |
(1798) 0x4710bd MOV 0x40(%RSP),%R13 |
(1798) 0x4710c2 VMOVSD (%R13,%RAX,8),%XMM23 |
(1798) 0x4710ca MOV 0xf0(%RSP),%R13 |
(1798) 0x4710d2 VMULSD %XMM22,%XMM23,%XMM24 |
(1798) 0x4710d8 VUCOMISD %XMM5,%XMM24 |
(1798) 0x4710de JAE 4710fe |
(1798) 0x4710e0 MOV 0x38(%RSP),%R11 |
(1798) 0x4710e5 MOV (%R11),%R11 |
(1798) 0x4710e8 VFMADD213SD (%R11,%R9,8),%XMM21,%XMM23 |
(1798) 0x4710ef VMOVSD %XMM23,(%R11,%R9,8) |
(1798) 0x4710f6 MOV 0x98(%RSP),%R11 |
(1798) 0x4710fe CMP %R12,%RDI |
(1798) 0x471101 JNE 4710a0 |
(1798) 0x471103 MOV 0x40(%RSP),%RDI |
(1798) 0x471108 VMOVSD (%RDI,%RAX,8),%XMM23 |
(1798) 0x47110f VMULSD %XMM22,%XMM23,%XMM24 |
(1798) 0x471115 VMULSD %XMM21,%XMM23,%XMM23 |
(1798) 0x47111b VCMPSD $0x1,%XMM5,%XMM24,%K1 |
(1798) 0x471122 VMOVAPD %XMM6,%XMM24 |
(1798) 0x471128 VMOVSD %XMM23,%XMM24,%XMM24{%K1} |
(1798) 0x47112e VADDSD %XMM17,%XMM24,%XMM17 |
(1798) 0x471134 JMP 4710a0 |
(1797) 0x471140 MOV 0x68(%RSP),%RDX |
(1797) 0x471145 ADD $0x8,%RAX |
(1797) 0x471149 CMP %RAX,%RDX |
(1797) 0x47114c JE 471044 |
(1797) 0x471152 MOV 0xc8(%RSP),%RDX |
(1797) 0x47115a MOV (%RDX,%RAX,8),%RDX |
(1797) 0x47115e MOV (%RBX,%RDX,8),%RSI |
(1797) 0x471162 CMP %R10,%RSI |
(1797) 0x471165 JL 471197 |
(1797) 0x471167 MOV 0x40(%RSP),%RDI |
(1797) 0x47116c VMOVSD (%RDI,%RAX,8),%XMM23 |
(1797) 0x471173 VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x471179 VUCOMISD %XMM5,%XMM24 |
(1797) 0x47117f JAE 471197 |
(1797) 0x471181 MOV 0x38(%RSP),%RDI |
(1797) 0x471186 MOV (%RDI),%RDI |
(1797) 0x471189 VFMADD213SD (%RDI,%RSI,8),%XMM21,%XMM23 |
(1797) 0x471190 VMOVSD %XMM23,(%RDI,%RSI,8) |
(1797) 0x471197 CMP %R12,%RDX |
(1797) 0x47119a JNE 4711cd |
(1797) 0x47119c MOV 0x40(%RSP),%RDX |
(1797) 0x4711a1 VMOVSD (%RDX,%RAX,8),%XMM23 |
(1797) 0x4711a8 VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x4711ae VMULSD %XMM21,%XMM23,%XMM23 |
(1797) 0x4711b4 VCMPSD $0x1,%XMM5,%XMM24,%K1 |
(1797) 0x4711bb VMOVAPD %XMM6,%XMM24 |
(1797) 0x4711c1 VMOVSD %XMM23,%XMM24,%XMM24{%K1} |
(1797) 0x4711c7 VADDSD %XMM17,%XMM24,%XMM17 |
(1797) 0x4711cd MOV 0xc8(%RSP),%RDX |
(1797) 0x4711d5 MOV 0x8(%RDX,%RAX,8),%RDX |
(1797) 0x4711da MOV (%RBX,%RDX,8),%RSI |
(1797) 0x4711de CMP %R10,%RSI |
(1797) 0x4711e1 JL 471214 |
(1797) 0x4711e3 MOV 0x40(%RSP),%RDI |
(1797) 0x4711e8 VMOVSD 0x8(%RDI,%RAX,8),%XMM23 |
(1797) 0x4711f0 VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x4711f6 VUCOMISD %XMM5,%XMM24 |
(1797) 0x4711fc JAE 471214 |
(1797) 0x4711fe MOV 0x38(%RSP),%RDI |
(1797) 0x471203 MOV (%RDI),%RDI |
(1797) 0x471206 VFMADD213SD (%RDI,%RSI,8),%XMM21,%XMM23 |
(1797) 0x47120d VMOVSD %XMM23,(%RDI,%RSI,8) |
(1797) 0x471214 CMP %R12,%RDX |
(1797) 0x471217 JNE 47124b |
(1797) 0x471219 MOV 0x40(%RSP),%RDX |
(1797) 0x47121e VMOVSD 0x8(%RDX,%RAX,8),%XMM23 |
(1797) 0x471226 VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x47122c VMULSD %XMM21,%XMM23,%XMM23 |
(1797) 0x471232 VCMPSD $0x1,%XMM5,%XMM24,%K1 |
(1797) 0x471239 VMOVAPD %XMM6,%XMM24 |
(1797) 0x47123f VMOVSD %XMM23,%XMM24,%XMM24{%K1} |
(1797) 0x471245 VADDSD %XMM17,%XMM24,%XMM17 |
(1797) 0x47124b MOV 0xc8(%RSP),%RDX |
(1797) 0x471253 MOV 0x10(%RDX,%RAX,8),%RDX |
(1797) 0x471258 MOV (%RBX,%RDX,8),%RSI |
(1797) 0x47125c CMP %R10,%RSI |
(1797) 0x47125f JL 471292 |
(1797) 0x471261 MOV 0x40(%RSP),%RDI |
(1797) 0x471266 VMOVSD 0x10(%RDI,%RAX,8),%XMM23 |
(1797) 0x47126e VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x471274 VUCOMISD %XMM5,%XMM24 |
(1797) 0x47127a JAE 471292 |
(1797) 0x47127c MOV 0x38(%RSP),%RDI |
(1797) 0x471281 MOV (%RDI),%RDI |
(1797) 0x471284 VFMADD213SD (%RDI,%RSI,8),%XMM21,%XMM23 |
(1797) 0x47128b VMOVSD %XMM23,(%RDI,%RSI,8) |
(1797) 0x471292 CMP %R12,%RDX |
(1797) 0x471295 JNE 4712c9 |
(1797) 0x471297 MOV 0x40(%RSP),%RDX |
(1797) 0x47129c VMOVSD 0x10(%RDX,%RAX,8),%XMM23 |
(1797) 0x4712a4 VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x4712aa VMULSD %XMM21,%XMM23,%XMM23 |
(1797) 0x4712b0 VCMPSD $0x1,%XMM5,%XMM24,%K1 |
(1797) 0x4712b7 VMOVAPD %XMM6,%XMM24 |
(1797) 0x4712bd VMOVSD %XMM23,%XMM24,%XMM24{%K1} |
(1797) 0x4712c3 VADDSD %XMM17,%XMM24,%XMM17 |
(1797) 0x4712c9 MOV 0xc8(%RSP),%RDX |
(1797) 0x4712d1 MOV 0x18(%RDX,%RAX,8),%RDX |
(1797) 0x4712d6 MOV (%RBX,%RDX,8),%RSI |
(1797) 0x4712da CMP %R10,%RSI |
(1797) 0x4712dd JL 471310 |
(1797) 0x4712df MOV 0x40(%RSP),%RDI |
(1797) 0x4712e4 VMOVSD 0x18(%RDI,%RAX,8),%XMM23 |
(1797) 0x4712ec VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x4712f2 VUCOMISD %XMM5,%XMM24 |
(1797) 0x4712f8 JAE 471310 |
(1797) 0x4712fa MOV 0x38(%RSP),%RDI |
(1797) 0x4712ff MOV (%RDI),%RDI |
(1797) 0x471302 VFMADD213SD (%RDI,%RSI,8),%XMM21,%XMM23 |
(1797) 0x471309 VMOVSD %XMM23,(%RDI,%RSI,8) |
(1797) 0x471310 CMP %R12,%RDX |
(1797) 0x471313 JNE 471347 |
(1797) 0x471315 MOV 0x40(%RSP),%RDX |
(1797) 0x47131a VMOVSD 0x18(%RDX,%RAX,8),%XMM23 |
(1797) 0x471322 VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x471328 VMULSD %XMM21,%XMM23,%XMM23 |
(1797) 0x47132e VCMPSD $0x1,%XMM5,%XMM24,%K1 |
(1797) 0x471335 VMOVAPD %XMM6,%XMM24 |
(1797) 0x47133b VMOVSD %XMM23,%XMM24,%XMM24{%K1} |
(1797) 0x471341 VADDSD %XMM17,%XMM24,%XMM17 |
(1797) 0x471347 MOV 0xc8(%RSP),%RDX |
(1797) 0x47134f MOV 0x20(%RDX,%RAX,8),%RDX |
(1797) 0x471354 MOV (%RBX,%RDX,8),%RSI |
(1797) 0x471358 CMP %R10,%RSI |
(1797) 0x47135b JL 47138e |
(1797) 0x47135d MOV 0x40(%RSP),%RDI |
(1797) 0x471362 VMOVSD 0x20(%RDI,%RAX,8),%XMM23 |
(1797) 0x47136a VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x471370 VUCOMISD %XMM5,%XMM24 |
(1797) 0x471376 JAE 47138e |
(1797) 0x471378 MOV 0x38(%RSP),%RDI |
(1797) 0x47137d MOV (%RDI),%RDI |
(1797) 0x471380 VFMADD213SD (%RDI,%RSI,8),%XMM21,%XMM23 |
(1797) 0x471387 VMOVSD %XMM23,(%RDI,%RSI,8) |
(1797) 0x47138e CMP %R12,%RDX |
(1797) 0x471391 JNE 4713c5 |
(1797) 0x471393 MOV 0x40(%RSP),%RDX |
(1797) 0x471398 VMOVSD 0x20(%RDX,%RAX,8),%XMM23 |
(1797) 0x4713a0 VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x4713a6 VMULSD %XMM21,%XMM23,%XMM23 |
(1797) 0x4713ac VCMPSD $0x1,%XMM5,%XMM24,%K1 |
(1797) 0x4713b3 VMOVAPD %XMM6,%XMM24 |
(1797) 0x4713b9 VMOVSD %XMM23,%XMM24,%XMM24{%K1} |
(1797) 0x4713bf VADDSD %XMM17,%XMM24,%XMM17 |
(1797) 0x4713c5 MOV 0xc8(%RSP),%RDX |
(1797) 0x4713cd MOV 0x28(%RDX,%RAX,8),%RDX |
(1797) 0x4713d2 MOV (%RBX,%RDX,8),%RSI |
(1797) 0x4713d6 CMP %R10,%RSI |
(1797) 0x4713d9 JL 47140c |
(1797) 0x4713db MOV 0x40(%RSP),%RDI |
(1797) 0x4713e0 VMOVSD 0x28(%RDI,%RAX,8),%XMM23 |
(1797) 0x4713e8 VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x4713ee VUCOMISD %XMM5,%XMM24 |
(1797) 0x4713f4 JAE 47140c |
(1797) 0x4713f6 MOV 0x38(%RSP),%RDI |
(1797) 0x4713fb MOV (%RDI),%RDI |
(1797) 0x4713fe VFMADD213SD (%RDI,%RSI,8),%XMM21,%XMM23 |
(1797) 0x471405 VMOVSD %XMM23,(%RDI,%RSI,8) |
(1797) 0x47140c CMP %R12,%RDX |
(1797) 0x47140f JNE 471443 |
(1797) 0x471411 MOV 0x40(%RSP),%RDX |
(1797) 0x471416 VMOVSD 0x28(%RDX,%RAX,8),%XMM23 |
(1797) 0x47141e VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x471424 VMULSD %XMM21,%XMM23,%XMM23 |
(1797) 0x47142a VCMPSD $0x1,%XMM5,%XMM24,%K1 |
(1797) 0x471431 VMOVAPD %XMM6,%XMM24 |
(1797) 0x471437 VMOVSD %XMM23,%XMM24,%XMM24{%K1} |
(1797) 0x47143d VADDSD %XMM17,%XMM24,%XMM17 |
(1797) 0x471443 MOV 0xc8(%RSP),%RDX |
(1797) 0x47144b MOV 0x30(%RDX,%RAX,8),%RDX |
(1797) 0x471450 MOV (%RBX,%RDX,8),%RSI |
(1797) 0x471454 CMP %R10,%RSI |
(1797) 0x471457 JL 47148a |
(1797) 0x471459 MOV 0x40(%RSP),%RDI |
(1797) 0x47145e VMOVSD 0x30(%RDI,%RAX,8),%XMM23 |
(1797) 0x471466 VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x47146c VUCOMISD %XMM5,%XMM24 |
(1797) 0x471472 JAE 47148a |
(1797) 0x471474 MOV 0x38(%RSP),%RDI |
(1797) 0x471479 MOV (%RDI),%RDI |
(1797) 0x47147c VFMADD213SD (%RDI,%RSI,8),%XMM21,%XMM23 |
(1797) 0x471483 VMOVSD %XMM23,(%RDI,%RSI,8) |
(1797) 0x47148a CMP %R12,%RDX |
(1797) 0x47148d JNE 4714c1 |
(1797) 0x47148f MOV 0x40(%RSP),%RDX |
(1797) 0x471494 VMOVSD 0x30(%RDX,%RAX,8),%XMM23 |
(1797) 0x47149c VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x4714a2 VMULSD %XMM21,%XMM23,%XMM23 |
(1797) 0x4714a8 VCMPSD $0x1,%XMM5,%XMM24,%K1 |
(1797) 0x4714af VMOVAPD %XMM6,%XMM24 |
(1797) 0x4714b5 VMOVSD %XMM23,%XMM24,%XMM24{%K1} |
(1797) 0x4714bb VADDSD %XMM17,%XMM24,%XMM17 |
(1797) 0x4714c1 MOV 0xc8(%RSP),%RDX |
(1797) 0x4714c9 MOV 0x38(%RDX,%RAX,8),%RDX |
(1797) 0x4714ce MOV (%RBX,%RDX,8),%RSI |
(1797) 0x4714d2 CMP %R10,%RSI |
(1797) 0x4714d5 JL 471508 |
(1797) 0x4714d7 MOV 0x40(%RSP),%RDI |
(1797) 0x4714dc VMOVSD 0x38(%RDI,%RAX,8),%XMM23 |
(1797) 0x4714e4 VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x4714ea VUCOMISD %XMM5,%XMM24 |
(1797) 0x4714f0 JAE 471508 |
(1797) 0x4714f2 MOV 0x38(%RSP),%RDI |
(1797) 0x4714f7 MOV (%RDI),%RDI |
(1797) 0x4714fa VFMADD213SD (%RDI,%RSI,8),%XMM21,%XMM23 |
(1797) 0x471501 VMOVSD %XMM23,(%RDI,%RSI,8) |
(1797) 0x471508 CMP %R12,%RDX |
(1797) 0x47150b JNE 471140 |
(1797) 0x471511 MOV 0x40(%RSP),%RDX |
(1797) 0x471516 VMOVSD 0x38(%RDX,%RAX,8),%XMM23 |
(1797) 0x47151e VMULSD %XMM22,%XMM23,%XMM24 |
(1797) 0x471524 VMULSD %XMM21,%XMM23,%XMM23 |
(1797) 0x47152a VCMPSD $0x1,%XMM5,%XMM24,%K1 |
(1797) 0x471531 VMOVAPD %XMM6,%XMM24 |
(1797) 0x471537 VMOVSD %XMM23,%XMM24,%XMM24{%K1} |
(1797) 0x47153d VADDSD %XMM17,%XMM24,%XMM17 |
(1797) 0x471543 JMP 471140 |
(1795) 0x471550 INC %RAX |
(1795) 0x471553 CMP %RAX,%RCX |
(1795) 0x471556 JE 470bc2 |
(1795) 0x47155c MOV 0x108(%RSP),%RDX |
(1795) 0x471564 MOV 0x20(%RSP),%RSI |
(1795) 0x471569 MOV (%RDX,%RAX,8),%RDX |
(1795) 0x47156d MOV (%RSI,%RDX,8),%RDX |
(1795) 0x471571 CMP %R8,%RDX |
(1795) 0x471574 JL 471550 |
(1795) 0x471576 MOV 0xd8(%RSP),%RSI |
(1795) 0x47157e VMOVSD (%RSI,%RAX,8),%XMM23 |
(1795) 0x471585 VMULSD %XMM22,%XMM23,%XMM24 |
(1795) 0x47158b VUCOMISD %XMM5,%XMM24 |
(1795) 0x471591 JAE 471550 |
(1795) 0x471593 MOV 0x58(%RSP),%RSI |
(1795) 0x471598 MOV (%RSI),%RSI |
(1795) 0x47159b VFMADD213SD (%RSI,%RDX,8),%XMM21,%XMM23 |
(1795) 0x4715a2 VMOVSD %XMM23,(%RSI,%RDX,8) |
(1795) 0x4715a9 JMP 471550 |
0x4715ab MOV %R15,%R13 |
0x4715ae MOV 0xe0(%RSP),%RSI |
0x4715b6 MOV 0x30(%RSP),%R15 |
0x4715bb VPBROADCASTQ %RDX,%ZMM23 |
0x4715c1 VBROADCASTSD %XMM22,%ZMM22 |
0x4715c7 XOR %EDX,%EDX |
0x4715c9 JMP 471601 |
0x4715cb VPBROADCASTQ %RDX,%ZMM23 |
0x4715d1 MOV %RSI,%RDX |
0x4715d4 MOV 0x98(%RSP),%R11 |
0x4715dc MOV 0x30(%RSP),%R15 |
0x4715e1 MOV 0xf0(%RSP),%R13 |
0x4715e9 MOV 0xd0(%RSP),%R9 |
0x4715f1 MOV 0xf8(%RSP),%RDI |
0x4715f9 MOV 0xe0(%RSP),%RSI |
0x471601 ADD %RDX,%RSI |
0x471604 VPBROADCASTQ %RDX,%ZMM24 |
0x47160a MOV %RSI,%RDX |
0x47160d MOV 0x108(%RSP),%RSI |
0x471615 VPSUBQ %ZMM24,%ZMM23,%ZMM23 |
0x47161b VPXORD %XMM24,%XMM24,%XMM24 |
0x471621 VPCMPNLEUQ %ZMM3,%ZMM23,%K1 |
0x471628 KMOVQ %K1,%K2 |
0x47162d VMOVDQU64 (%RSI,%RDX,8),%ZMM23{%K1}{z} |
0x471634 MOV 0xd8(%RSP),%RSI |
0x47163c VMOVDQA64 %ZMM23,%ZMM15{%K1} |
0x471642 VPSLLQ $0x3,%ZMM15,%ZMM23 |
0x471649 VPADDQ %ZMM23,%ZMM1,%ZMM23 |
0x47164f VPGATHERQQ (,%ZMM23,1),%ZMM24{%K2} |
0x47165a VMOVDQA64 %ZMM24,%ZMM14{%K1} |
0x471660 VPCMPNLTQ %ZMM20,%ZMM14,%K1{%K1} |
0x471667 VMOVUPD (%RSI,%RDX,8),%ZMM23{%K1}{z} |
0x47166e VMOVAPD %ZMM23,%ZMM11{%K1} |
0x471674 VMULPD %ZMM22,%ZMM11,%ZMM22 |
0x47167a VCMPPD $0x1,%ZMM4,%ZMM22,%K2 |
0x471681 VBLENDMPD %ZMM11,%ZMM2,%ZMM22{%K2} |
0x471687 VMOVAPD %ZMM22,%ZMM22{%K1}{z} |
0x47168d VEXTRACTF64X4 $0x1,%ZMM22,%YMM23 |
0x471694 VADDPD %ZMM23,%ZMM22,%ZMM22 |
0x47169a VEXTRACTF32X4 $0x1,%YMM22,%XMM23 |
0x4716a1 VADDPD %XMM23,%XMM22,%XMM22 |
0x4716a7 VPERMILPD $0x1,%XMM22,%XMM23 |
0x4716ae VADDSD %XMM23,%XMM22,%XMM22 |
0x4716b4 VADDSD %XMM22,%XMM21,%XMM21 |
0x4716ba MOV 0x40(%RSP),%RDX |
0x4716bf VUCOMISD %XMM5,%XMM21 |
0x4716c5 VMOVSD (%RDX,%R14,8),%XMM22 |
0x4716cc JNE 471001 |
0x4716d2 VADDSD %XMM17,%XMM22,%XMM17 |
0x4716d8 JMP 470bc2 |
0x4716dd LEA 0x18(,%RAX,8),%RDI |
0x4716e5 MOV %RSI,%RDX |
0x4716e8 SHR $0x2,%RDX |
0x4716ec JMP 4716fd |
(1796) 0x4716f0 ADD $0x20,%RDI |
(1796) 0x4716f4 DEC %RDX |
(1796) 0x4716f7 JE 471080 |
(1796) 0x4716fd MOV 0x108(%RSP),%R9 |
(1796) 0x471705 MOV 0x20(%RSP),%R13 |
(1796) 0x47170a MOV -0x18(%R9,%RDI,1),%R9 |
(1796) 0x47170f MOV (%R13,%R9,8),%R9 |
(1796) 0x471714 CMP %R8,%R9 |
(1796) 0x471717 JL 471755 |
(1796) 0x471719 MOV 0xd8(%RSP),%R13 |
(1796) 0x471721 VMOVSD -0x18(%R13,%RDI,1),%XMM23 |
(1796) 0x471729 VMULSD %XMM22,%XMM23,%XMM24 |
(1796) 0x47172f VUCOMISD %XMM5,%XMM24 |
(1796) 0x471735 JAE 471755 |
(1796) 0x471737 MOV 0x58(%RSP),%R11 |
(1796) 0x47173c MOV (%R11),%R11 |
(1796) 0x47173f VFMADD213SD (%R11,%R9,8),%XMM21,%XMM23 |
(1796) 0x471746 VMOVSD %XMM23,(%R11,%R9,8) |
(1796) 0x47174d MOV 0x98(%RSP),%R11 |
(1796) 0x471755 MOV 0x108(%RSP),%R9 |
(1796) 0x47175d MOV 0x20(%RSP),%R13 |
(1796) 0x471762 MOV -0x10(%R9,%RDI,1),%R9 |
(1796) 0x471767 MOV (%R13,%R9,8),%R9 |
(1796) 0x47176c CMP %R8,%R9 |
(1796) 0x47176f JL 4717ad |
(1796) 0x471771 MOV 0xd8(%RSP),%R13 |
(1796) 0x471779 VMOVSD -0x10(%R13,%RDI,1),%XMM23 |
(1796) 0x471781 VMULSD %XMM22,%XMM23,%XMM24 |
(1796) 0x471787 VUCOMISD %XMM5,%XMM24 |
(1796) 0x47178d JAE 4717ad |
(1796) 0x47178f MOV 0x58(%RSP),%R11 |
(1796) 0x471794 MOV (%R11),%R11 |
(1796) 0x471797 VFMADD213SD (%R11,%R9,8),%XMM21,%XMM23 |
(1796) 0x47179e VMOVSD %XMM23,(%R11,%R9,8) |
(1796) 0x4717a5 MOV 0x98(%RSP),%R11 |
(1796) 0x4717ad MOV 0x108(%RSP),%R9 |
(1796) 0x4717b5 MOV 0x20(%RSP),%R13 |
(1796) 0x4717ba MOV -0x8(%R9,%RDI,1),%R9 |
(1796) 0x4717bf MOV (%R13,%R9,8),%R9 |
(1796) 0x4717c4 CMP %R8,%R9 |
(1796) 0x4717c7 JL 471805 |
(1796) 0x4717c9 MOV 0xd8(%RSP),%R13 |
(1796) 0x4717d1 VMOVSD -0x8(%R13,%RDI,1),%XMM23 |
(1796) 0x4717d9 VMULSD %XMM22,%XMM23,%XMM24 |
(1796) 0x4717df VUCOMISD %XMM5,%XMM24 |
(1796) 0x4717e5 JAE 471805 |
(1796) 0x4717e7 MOV 0x58(%RSP),%R11 |
(1796) 0x4717ec MOV (%R11),%R11 |
(1796) 0x4717ef VFMADD213SD (%R11,%R9,8),%XMM21,%XMM23 |
(1796) 0x4717f6 VMOVSD %XMM23,(%R11,%R9,8) |
(1796) 0x4717fd MOV 0x98(%RSP),%R11 |
(1796) 0x471805 MOV 0x108(%RSP),%R9 |
(1796) 0x47180d MOV 0x20(%RSP),%R13 |
(1796) 0x471812 MOV (%R9,%RDI,1),%R9 |
(1796) 0x471816 MOV (%R13,%R9,8),%R9 |
(1796) 0x47181b CMP %R8,%R9 |
(1796) 0x47181e JL 4716f0 |
(1796) 0x471824 MOV 0xd8(%RSP),%R13 |
(1796) 0x47182c VMOVSD (%R13,%RDI,1),%XMM23 |
(1796) 0x471834 VMULSD %XMM22,%XMM23,%XMM24 |
(1796) 0x47183a VUCOMISD %XMM5,%XMM24 |
(1796) 0x471840 JAE 4716f0 |
(1796) 0x471846 MOV 0x58(%RSP),%R11 |
(1796) 0x47184b MOV (%R11),%R11 |
(1796) 0x47184e VFMADD213SD (%R11,%R9,8),%XMM21,%XMM23 |
(1796) 0x471855 VMOVSD %XMM23,(%R11,%R9,8) |
(1796) 0x47185c MOV 0x98(%RSP),%R11 |
(1796) 0x471864 JMP 4716f0 |
0x471869 MOV 0xf0(%RSP),%R13 |
0x471871 JMP 470bc2 |
/home/eoseret/qaas_runs_CPU_9468/172-019-1763/intel/AMG/build/AMG/AMG/parcsr_ls/par_lr_interp.c: 1221 - 1675 |
-------------------------------------------------------------------------------- |
1221: if (n_fine) |
[...] |
1609: for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) |
1610: { /* i1 is a c-point and strongly influences i, accumulate |
1611: * a_(i,i1) into interpolation weight */ |
1612: i1 = A_diag_j[jj]; |
1613: if (P_marker[i1] >= jj_begin_row) |
1614: { |
1615: P_diag_data[P_marker[i1]] += A_diag_data[jj]; |
1616: } |
1617: else if(P_marker[i1] == strong_f_marker) |
1618: { |
1619: sum = zero; |
1620: sgn = 1; |
1621: if(A_diag_data[A_diag_i[i1]] < 0) sgn = -1; |
1622: /* Loop over row of A for point i1 and calculate the sum |
1623: * of the connections to c-points that strongly influence i. */ |
1624: for(jj1 = A_diag_i[i1]+1; jj1 < A_diag_i[i1+1]; jj1++) |
1625: { |
1626: i2 = A_diag_j[jj1]; |
1627: if((P_marker[i2] >= jj_begin_row || i2 == i) && (sgn*A_diag_data[jj1]) < 0) |
1628: sum += A_diag_data[jj1]; |
1629: } |
1630: if(num_procs > 1) |
1631: { |
1632: for(jj1 = A_offd_i[i1]; jj1< A_offd_i[i1+1]; jj1++) |
1633: { |
1634: i2 = A_offd_j[jj1]; |
1635: if(P_marker_offd[i2] >= jj_begin_row_offd && |
1636: (sgn*A_offd_data[jj1]) < 0) |
1637: sum += A_offd_data[jj1]; |
1638: } |
1639: } |
1640: if(sum != 0) |
1641: { |
1642: distribute = A_diag_data[jj]/sum; |
1643: /* Loop over row of A for point i1 and do the distribution */ |
1644: for(jj1 = A_diag_i[i1]+1; jj1 < A_diag_i[i1+1]; jj1++) |
1645: { |
1646: i2 = A_diag_j[jj1]; |
1647: if(P_marker[i2] >= jj_begin_row && (sgn*A_diag_data[jj1]) < 0) |
1648: P_diag_data[P_marker[i2]] += |
1649: distribute*A_diag_data[jj1]; |
1650: if(i2 == i && (sgn*A_diag_data[jj1]) < 0) |
1651: diagonal += distribute*A_diag_data[jj1]; |
1652: } |
1653: if(num_procs > 1) |
1654: { |
1655: for(jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++) |
1656: { |
1657: i2 = A_offd_j[jj1]; |
1658: if(P_marker_offd[i2] >= jj_begin_row_offd && |
1659: (sgn*A_offd_data[jj1]) < 0) |
1660: P_offd_data[P_marker_offd[i2]] += |
[...] |
1667: diagonal += A_diag_data[jj]; |
1668: } |
1669: } |
1670: /* neighbor i1 weakly influences i, accumulate a_(i,i1) into |
1671: * diagonal */ |
1672: else if (CF_marker[i1] != -3) |
1673: { |
1674: if(num_functions == 1 || dof_func[i] == dof_func[i1]) |
1675: diagonal += A_diag_data[jj]; |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.41 |
CQA speedup if FP arith vectorized | 1.05 |
CQA speedup if fully vectorized | 1.35 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.97 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildExtPIInterp.extracted |
Source | par_lr_interp.c:1221-1221,par_lr_interp.c:1609-1609,par_lr_interp.c:1612-1617,par_lr_interp.c:1621-1621,par_lr_interp.c:1624-1627,par_lr_interp.c:1630-1636,par_lr_interp.c:1640-1644,par_lr_interp.c:1647-1647,par_lr_interp.c:1655-1655,par_lr_interp.c:1659-1660,par_lr_interp.c:1667-1667,par_lr_interp.c:1672-1675 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 61.17 |
CQA cycles if no scalar integer | 43.50 |
CQA cycles if FP arith vectorized | 58.44 |
CQA cycles if fully vectorized | 45.24 |
Front-end cycles | 61.17 |
DIV/SQRT cycles | 16.00 |
P0 cycles | 14.00 |
P1 cycles | 14.00 |
P2 cycles | 14.00 |
P3 cycles | 16.00 |
P4 cycles | 31.00 |
P5 cycles | 31.00 |
P6 cycles | 31.00 |
P7 cycles | 28.25 |
P8 cycles | 26.17 |
P9 cycles | 27.83 |
P10 cycles | 28.75 |
P11 cycles | 12.50 |
P12 cycles | 12.50 |
P13 cycles | 5.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 264.00 |
Nb uops | 367.00 |
Nb loads | 80.00 |
Nb stores | 10.00 |
Nb stack references | 24.00 |
FLOP/cycle | 1.10 |
Nb FLOP add-sub | 50.00 |
Nb FLOP mul | 16.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 17.26 |
Bytes prefetched | 0.00 |
Bytes loaded | 976.00 |
Bytes stored | 80.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 44.88 |
Vectorization ratio load | 23.08 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 54.55 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 51.32 |
Vector-efficiency ratio all | 39.52 |
Vector-efficiency ratio load | 32.69 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 46.59 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 40.87 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.41 |
CQA speedup if FP arith vectorized | 1.05 |
CQA speedup if fully vectorized | 1.35 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.97 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildExtPIInterp.extracted |
Source | par_lr_interp.c:1221-1221,par_lr_interp.c:1609-1609,par_lr_interp.c:1612-1617,par_lr_interp.c:1621-1621,par_lr_interp.c:1624-1627,par_lr_interp.c:1630-1636,par_lr_interp.c:1640-1644,par_lr_interp.c:1647-1647,par_lr_interp.c:1655-1655,par_lr_interp.c:1659-1660,par_lr_interp.c:1667-1667,par_lr_interp.c:1672-1675 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 61.17 |
CQA cycles if no scalar integer | 43.50 |
CQA cycles if FP arith vectorized | 58.44 |
CQA cycles if fully vectorized | 45.24 |
Front-end cycles | 61.17 |
DIV/SQRT cycles | 16.00 |
P0 cycles | 14.00 |
P1 cycles | 14.00 |
P2 cycles | 14.00 |
P3 cycles | 16.00 |
P4 cycles | 31.00 |
P5 cycles | 31.00 |
P6 cycles | 31.00 |
P7 cycles | 28.25 |
P8 cycles | 26.17 |
P9 cycles | 27.83 |
P10 cycles | 28.75 |
P11 cycles | 12.50 |
P12 cycles | 12.50 |
P13 cycles | 5.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 264.00 |
Nb uops | 367.00 |
Nb loads | 80.00 |
Nb stores | 10.00 |
Nb stack references | 24.00 |
FLOP/cycle | 1.10 |
Nb FLOP add-sub | 50.00 |
Nb FLOP mul | 16.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 17.26 |
Bytes prefetched | 0.00 |
Bytes loaded | 976.00 |
Bytes stored | 80.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 44.88 |
Vectorization ratio load | 23.08 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 54.55 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 51.32 |
Vector-efficiency ratio all | 39.52 |
Vector-efficiency ratio load | 32.69 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 46.59 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 40.87 |
Path / |
Function | hypre_BoomerAMGBuildExtPIInterp.extracted |
Source file and lines | par_lr_interp.c:1221-1675 |
Module | exec |
nb instructions | 264 |
nb uops | 367 |
loop length | 1472 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 7 |
used ymm registers | 4 |
used zmm registers | 18 |
nb stack references | 24 |
ADD-SUB / MUL ratio | 9.00 |
micro-operation queue | 61.17 cycles |
front end | 61.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 16.00 | 14.00 | 14.00 | 14.00 | 16.00 | 29.67 | 29.67 | 29.67 | 26.25 | 26.17 | 26.33 | 26.25 | 12.50 | 12.50 |
cycles | 16.00 | 14.00 | 14.00 | 14.00 | 16.00 | 31.00 | 31.00 | 31.00 | 28.25 | 26.17 | 27.83 | 28.75 | 12.50 | 12.50 |
Cycles executing div or sqrt instructions | 5.00 |
Front-end | 61.17 |
Dispatch | 31.00 |
DIV/SQRT | 5.00 |
Overall L1 | 61.17 |
all | 31% |
load | 23% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 34% |
all | 60% |
load | 22% |
store | 0% |
mul | 100% |
add-sub | 44% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 75% |
all | 44% |
load | 23% |
store | 0% |
mul | 100% |
add-sub | 54% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 51% |
all | 37% |
load | 33% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 38% |
all | 41% |
load | 31% |
store | 12% |
mul | 100% |
add-sub | 34% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 43% |
all | 39% |
load | 32% |
store | 12% |
mul | 100% |
add-sub | 46% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 40% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x40(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RCX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVSD (%RCX,%RAX,8),%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
VADDSD (%RDX,%R14,8),%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VMOVSD %XMM21,(%RCX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
INC %R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R13,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 471876 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2b26> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0xc8(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RAX,%R14,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RBX,%RCX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R10,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 470ba0 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e50> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP 0x70(%RSP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JNE 470d30 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1fe0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x210(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x40(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VXORPD %XMM21,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
MOV (%RDX,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RDX,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VUCOMISD (%RSI,%RDI,8),%XMM21 | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
MOV %RDX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
SETBE %AL | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA -0x1(%RAX,%RAX,1),%R9 | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
LEA 0x1(%RDI),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RDX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 470e73 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2123> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RDI,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
NOT %RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VCVTSI2SD %R9,%XMM26,%XMM21 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1.25 | scal (12.5%) |
ADD %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 470d75 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2025> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R9,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
VBROADCASTSD %XMM21,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
LEA -0x1(%RDX),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM21,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
MOV 0x128(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%R9,%R15,8),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x170(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R15,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA (%R9,%R15,8),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM21,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM23,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF32X4 $0x1,%YMM21,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VADDPD %XMM23,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM21,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM23,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 470d8b <hypre_BoomerAMGBuildExtPIInterp.extracted+0x203b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x98(%RSP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xd0(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xf8(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 470e73 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2123> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV 0xa8(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMPQ $-0x3,(%RAX,%RCX,8) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JE 470bc2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e72> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMPQ $0x1,0x208(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JE 470d64 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2014> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x200(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RDX,%R12,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP (%RDX,%RCX,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JNE 470bc2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e72> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDSD (%RAX,%R14,8),%XMM17,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
JMP 470bc2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e72> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VBROADCASTSD %XMM21,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VPBROADCASTQ %RSI,%ZMM23 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VXORPD %XMM21,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 470dae <hypre_BoomerAMGBuildExtPIInterp.extracted+0x205e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV 0x98(%RSP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xd0(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xf8(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %RSI,%ZMM23 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0xc8(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %RDX,%ZMM24 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %RAX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM24,%ZMM23,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXORD %XMM24,%XMM24,%XMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM3,%ZMM23,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RSI,%RDX,8),%ZMM23{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV 0x40(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVDQA64 %ZMM23,%ZMM12{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM12,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM23,%ZMM0,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPGATHERQQ (,%ZMM23,1),%ZMM24{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 9 | vect (100.0%) |
VPCMPEQQ %ZMM18,%ZMM12,%K2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM24,%ZMM10{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPCMPNLTQ %ZMM19,%ZMM10,%K0 | vect (100.0%) | |||||||||||||||||
KANDNB %K2,%K0,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
KANDB %K0,%K1,%K0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
KORB %K2,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VMOVUPD (%RSI,%RDX,8),%ZMM23{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPD %ZMM23,%ZMM9{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMULPD %ZMM22,%ZMM9,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VCMPPD $0x1,%ZMM4,%ZMM22,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VBLENDMPD %ZMM9,%ZMM2,%ZMM22{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMOVAPD %ZMM22,%ZMM22{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM22,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM23,%ZMM22,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF32X4 $0x1,%YMM22,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VADDPD %XMM23,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM22,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM23,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM22,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV 0x90(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RDX,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CMP $0x2,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JL 470fe9 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2299> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x178(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R13,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %R9,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
MOV (%RSI,%RCX,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RSI,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
SUB %R9,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JLE 470fde <hypre_BoomerAMGBuildExtPIInterp.extracted+0x228e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VCVTSI2SD %R13,%XMM26,%XMM22 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1.25 | scal (12.5%) |
MOV %RDX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %R9,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R13,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
JE 4715ab <hypre_BoomerAMGBuildExtPIInterp.extracted+0x285b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RDI,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R9,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RSI,0x168(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA -0x1(%RSI),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VBROADCASTSD %XMM22,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VXORPD %XMM23,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
MOV 0xd8(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xe0(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x108(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x20(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%R9,%RSI,8),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA (%R15,%RSI,8),%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM23,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x168(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM24,%ZMM23,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF32X4 $0x1,%YMM23,%XMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VADDPD %XMM24,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM23,%XMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM24,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM23,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %RSI,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 4715cb <hypre_BoomerAMGBuildExtPIInterp.extracted+0x287b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x98(%RSP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xf0(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0xd0(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xf8(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x40(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VUCOMISD %XMM5,%XMM21 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVSD (%RDX,%R14,8),%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
JNE 471001 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x22b1> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 4716d2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2982> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV %R13,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %R15,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x40(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VUCOMISD %XMM5,%XMM21 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVSD (%RDX,%R14,8),%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
JE 4716d2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2982> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VDIVSD %XMM21,%XMM22,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 | scal (12.5%) |
MOV 0x68(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %RDX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 471044 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x22f4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %EDI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
NOT %ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VCVTSI2SD %R9,%XMM26,%XMM22 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1.25 | scal (12.5%) |
MOV %R9,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
ADD %EDX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SUB %RDI,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $-0x2,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
AND $0x7,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JNE 4710a8 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2358> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0xd0(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP $0x7,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 471152 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2402> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMPQ $0x2,0x120(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JL 470bc2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e72> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x178(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RDX,%RCX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RDX,%RCX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SUB %RAX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JLE 470bc2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e72> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VCVTSI2SD %R9,%XMM26,%XMM22 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1.25 | scal (12.5%) |
CMP $0x4,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 4716dd <hypre_BoomerAMGBuildExtPIInterp.extracted+0x298d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x4,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RSI,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 471869 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2b19> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0xf0(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
ADD %RDX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 47155c <hypre_BoomerAMGBuildExtPIInterp.extracted+0x280c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV %R15,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
MOV 0xe0(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %RDX,%ZMM23 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VBROADCASTSD %XMM22,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 471601 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x28b1> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %RDX,%ZMM23 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x98(%RSP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xf0(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0xd0(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xf8(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xe0(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
ADD %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPBROADCASTQ %RDX,%ZMM24 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x108(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPSUBQ %ZMM24,%ZMM23,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXORD %XMM24,%XMM24,%XMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM3,%ZMM23,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RSI,%RDX,8),%ZMM23{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV 0xd8(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVDQA64 %ZMM23,%ZMM15{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM15,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM23,%ZMM1,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPGATHERQQ (,%ZMM23,1),%ZMM24{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 9 | vect (100.0%) |
VMOVDQA64 %ZMM24,%ZMM14{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPCMPNLTQ %ZMM20,%ZMM14,%K1{%K1} | vect (100.0%) | |||||||||||||||||
VMOVUPD (%RSI,%RDX,8),%ZMM23{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPD %ZMM23,%ZMM11{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMULPD %ZMM22,%ZMM11,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VCMPPD $0x1,%ZMM4,%ZMM22,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VBLENDMPD %ZMM11,%ZMM2,%ZMM22{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMOVAPD %ZMM22,%ZMM22{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM22,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM23,%ZMM22,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF32X4 $0x1,%YMM22,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VADDPD %XMM23,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM22,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM23,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM22,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV 0x40(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VUCOMISD %XMM5,%XMM21 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVSD (%RDX,%R14,8),%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
JNE 471001 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x22b1> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VADDSD %XMM17,%XMM22,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
JMP 470bc2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e72> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
LEA 0x18(,%RAX,8),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SHR $0x2,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
JMP 4716fd <hypre_BoomerAMGBuildExtPIInterp.extracted+0x29ad> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV 0xf0(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
JMP 470bc2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e72> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
Function | hypre_BoomerAMGBuildExtPIInterp.extracted |
Source file and lines | par_lr_interp.c:1221-1675 |
Module | exec |
nb instructions | 264 |
nb uops | 367 |
loop length | 1472 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 7 |
used ymm registers | 4 |
used zmm registers | 18 |
nb stack references | 24 |
ADD-SUB / MUL ratio | 9.00 |
micro-operation queue | 61.17 cycles |
front end | 61.17 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 16.00 | 14.00 | 14.00 | 14.00 | 16.00 | 29.67 | 29.67 | 29.67 | 26.25 | 26.17 | 26.33 | 26.25 | 12.50 | 12.50 |
cycles | 16.00 | 14.00 | 14.00 | 14.00 | 16.00 | 31.00 | 31.00 | 31.00 | 28.25 | 26.17 | 27.83 | 28.75 | 12.50 | 12.50 |
Cycles executing div or sqrt instructions | 5.00 |
Front-end | 61.17 |
Dispatch | 31.00 |
DIV/SQRT | 5.00 |
Overall L1 | 61.17 |
all | 31% |
load | 23% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 34% |
all | 60% |
load | 22% |
store | 0% |
mul | 100% |
add-sub | 44% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 75% |
all | 44% |
load | 23% |
store | 0% |
mul | 100% |
add-sub | 54% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 51% |
all | 37% |
load | 33% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 38% |
all | 41% |
load | 31% |
store | 12% |
mul | 100% |
add-sub | 34% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 43% |
all | 39% |
load | 32% |
store | 12% |
mul | 100% |
add-sub | 46% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 40% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x40(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RCX),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVSD (%RCX,%RAX,8),%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
VADDSD (%RDX,%R14,8),%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VMOVSD %XMM21,(%RCX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
INC %R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %R13,%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 471876 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2b26> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0xc8(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RAX,%R14,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RBX,%RCX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R10,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 470ba0 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e50> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMP 0x70(%RSP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JNE 470d30 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1fe0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x210(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x40(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VXORPD %XMM21,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
MOV (%RDX,%RCX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RDX,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VUCOMISD (%RSI,%RDI,8),%XMM21 | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
MOV %RDX,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
SETBE %AL | 1 | 0.50 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
LEA -0x1(%RAX,%RAX,1),%R9 | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
LEA 0x1(%RDI),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RDX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 470e73 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2123> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RDI,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
NOT %RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VCVTSI2SD %R9,%XMM26,%XMM21 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1.25 | scal (12.5%) |
ADD %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 470d75 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2025> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R9,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RDI,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
VBROADCASTSD %XMM21,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
LEA -0x1(%RDX),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM21,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
MOV 0x128(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%R9,%R15,8),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x170(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R15,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA (%R9,%R15,8),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %R15D,%R15D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPW (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM21,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM23,%ZMM21,%ZMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF32X4 $0x1,%YMM21,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VADDPD %XMM23,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM21,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM23,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 470d8b <hypre_BoomerAMGBuildExtPIInterp.extracted+0x203b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x98(%RSP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xd0(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xf8(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 470e73 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2123> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV 0xa8(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMPQ $-0x3,(%RAX,%RCX,8) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JE 470bc2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e72> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMPQ $0x1,0x208(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JE 470d64 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2014> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x200(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RDX,%R12,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP (%RDX,%RCX,8),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JNE 470bc2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e72> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDSD (%RAX,%R14,8),%XMM17,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
JMP 470bc2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e72> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VBROADCASTSD %XMM21,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VPBROADCASTQ %RSI,%ZMM23 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VXORPD %XMM21,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 470dae <hypre_BoomerAMGBuildExtPIInterp.extracted+0x205e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV 0x98(%RSP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xd0(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xf8(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %RSI,%ZMM23 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0xc8(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %RDX,%ZMM24 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %RAX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM24,%ZMM23,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXORD %XMM24,%XMM24,%XMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM3,%ZMM23,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RSI,%RDX,8),%ZMM23{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV 0x40(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVDQA64 %ZMM23,%ZMM12{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM12,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM23,%ZMM0,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPGATHERQQ (,%ZMM23,1),%ZMM24{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 9 | vect (100.0%) |
VPCMPEQQ %ZMM18,%ZMM12,%K2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM24,%ZMM10{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPCMPNLTQ %ZMM19,%ZMM10,%K0 | vect (100.0%) | |||||||||||||||||
KANDNB %K2,%K0,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
KANDB %K0,%K1,%K0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
KORB %K2,%K0,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 | N/A |
VMOVUPD (%RSI,%RDX,8),%ZMM23{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPD %ZMM23,%ZMM9{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMULPD %ZMM22,%ZMM9,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VCMPPD $0x1,%ZMM4,%ZMM22,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VBLENDMPD %ZMM9,%ZMM2,%ZMM22{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMOVAPD %ZMM22,%ZMM22{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM22,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM23,%ZMM22,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF32X4 $0x1,%YMM22,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VADDPD %XMM23,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM22,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM23,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM22,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV 0x90(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RDX,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CMP $0x2,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JL 470fe9 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2299> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x178(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R13,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %R9,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
MOV (%RSI,%RCX,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RSI,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
SUB %R9,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JLE 470fde <hypre_BoomerAMGBuildExtPIInterp.extracted+0x228e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VCVTSI2SD %R13,%XMM26,%XMM22 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1.25 | scal (12.5%) |
MOV %RDX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %R9,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R13,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
JE 4715ab <hypre_BoomerAMGBuildExtPIInterp.extracted+0x285b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RDI,0xf8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R9,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RSI,0x168(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA -0x1(%RSI),%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VBROADCASTSD %XMM22,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VXORPD %XMM23,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %R13D,%R13D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
MOV 0xd8(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xe0(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x108(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x20(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%R9,%RSI,8),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA (%R15,%RSI,8),%R15 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XCHG %AX,%AX | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM23,%YMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x168(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM24,%ZMM23,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF32X4 $0x1,%YMM23,%XMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VADDPD %XMM24,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM23,%XMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM24,%XMM23,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM23,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %RSI,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 4715cb <hypre_BoomerAMGBuildExtPIInterp.extracted+0x287b> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x98(%RSP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xf0(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0xd0(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xf8(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x40(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VUCOMISD %XMM5,%XMM21 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVSD (%RDX,%R14,8),%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
JNE 471001 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x22b1> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 4716d2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2982> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV %R13,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %R15,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x40(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VUCOMISD %XMM5,%XMM21 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVSD (%RDX,%R14,8),%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
JE 4716d2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2982> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VDIVSD %XMM21,%XMM22,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 | scal (12.5%) |
MOV 0x68(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %RDX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE 471044 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x22f4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %EDI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
NOT %ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VCVTSI2SD %R9,%XMM26,%XMM22 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1.25 | scal (12.5%) |
MOV %R9,0xd0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
ADD %EDX,%ESI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
SUB %RDI,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $-0x2,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
AND $0x7,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JNE 4710a8 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2358> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0xd0(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP $0x7,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 471152 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2402> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMPQ $0x2,0x120(%RSP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JL 470bc2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e72> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x178(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RDX,%RCX,8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RDX,%RCX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RCX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SUB %RAX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JLE 470bc2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e72> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VCVTSI2SD %R9,%XMM26,%XMM22 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1.25 | scal (12.5%) |
CMP $0x4,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 4716dd <hypre_BoomerAMGBuildExtPIInterp.extracted+0x298d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x4,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RSI,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JAE 471869 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x2b19> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0xf0(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
ADD %RDX,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JMP 47155c <hypre_BoomerAMGBuildExtPIInterp.extracted+0x280c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV %R15,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
MOV 0xe0(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %RDX,%ZMM23 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VBROADCASTSD %XMM22,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 471601 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x28b1> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %RDX,%ZMM23 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x98(%RSP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x30(%RSP),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xf0(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0xd0(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xf8(%RSP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0xe0(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
ADD %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPBROADCASTQ %RDX,%ZMM24 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x108(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPSUBQ %ZMM24,%ZMM23,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXORD %XMM24,%XMM24,%XMM24 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM3,%ZMM23,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RSI,%RDX,8),%ZMM23{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV 0xd8(%RSP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVDQA64 %ZMM23,%ZMM15{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM15,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM23,%ZMM1,%ZMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPGATHERQQ (,%ZMM23,1),%ZMM24{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 9 | vect (100.0%) |
VMOVDQA64 %ZMM24,%ZMM14{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPCMPNLTQ %ZMM20,%ZMM14,%K1{%K1} | vect (100.0%) | |||||||||||||||||
VMOVUPD (%RSI,%RDX,8),%ZMM23{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPD %ZMM23,%ZMM11{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMULPD %ZMM22,%ZMM11,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VCMPPD $0x1,%ZMM4,%ZMM22,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 1 | vect (100.0%) |
VBLENDMPD %ZMM11,%ZMM2,%ZMM22{%K2} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMOVAPD %ZMM22,%ZMM22{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM22,%YMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM23,%ZMM22,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF32X4 $0x1,%YMM22,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 4 | 0.25 | vect (25.0%) |
VADDPD %XMM23,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM22,%XMM23 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM23,%XMM22,%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM22,%XMM21,%XMM21 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV 0x40(%RSP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VUCOMISD %XMM5,%XMM21 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
VMOVSD (%RDX,%R14,8),%XMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
JNE 471001 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x22b1> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VADDSD %XMM17,%XMM22,%XMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
JMP 470bc2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e72> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
LEA 0x18(,%RAX,8),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
SHR $0x2,%RDX | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
JMP 4716fd <hypre_BoomerAMGBuildExtPIInterp.extracted+0x29ad> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV 0xf0(%RSP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
JMP 470bc2 <hypre_BoomerAMGBuildExtPIInterp.extracted+0x1e72> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |