Loop Id: 6434 | Module: exec | Source: csr_matvec.c:248-314 [...] | Coverage: 5.65% |
---|
Loop Id: 6434 | Module: exec | Source: csr_matvec.c:248-314 [...] | Coverage: 5.65% |
---|
0x573023 VPBROADCASTQ %R12,%ZMM7 |
0x573029 XOR %R10D,%R10D |
0x57302c MOV 0x10(%RBP),%RAX |
0x573030 VPBROADCASTQ %R10,%ZMM8 |
0x573036 ADD %R10,%R11 |
0x573039 VPSUBQ %ZMM8,%ZMM7,%ZMM7 |
0x57303f VPXOR %XMM8,%XMM8,%XMM8 |
0x573044 VPCMPNLEUQ %ZMM4,%ZMM7,%K1 |
0x57304b KMOVQ %K1,%K2 |
0x573050 VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} |
0x573057 MOV -0x30(%RBP),%RAX |
0x57305b VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} |
0x573062 VMOVDQA64 %ZMM7,%ZMM2{%K1} |
0x573068 VPSLLQ $0x3,%ZMM2,%ZMM7 |
0x57306f VPADDQ %ZMM7,%ZMM3,%ZMM7 |
0x573075 VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} |
0x573080 VMOVAPD %ZMM9,%ZMM0{%K1} |
0x573086 VMOVAPD %ZMM8,%ZMM1{%K1} |
0x57308c VXORPD %ZMM5,%ZMM1,%ZMM8 |
0x573092 VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} |
0x573098 VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x57309f VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x5730a5 VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x5730ab VADDPD %XMM7,%XMM8,%XMM7 |
0x5730af VPERMILPD $0x1,%XMM7,%XMM8 |
0x5730b5 VADDSD %XMM7,%XMM8,%XMM7 |
0x5730b9 VADDSD %XMM7,%XMM6,%XMM6 |
0x5730bd LEA 0x7(%R8,%R15,1),%RAX |
0x5730c2 ADD $0x8,%R8 |
0x5730c6 VMOVSD %XMM6,(%RDX,%RAX,8) |
0x5730cb CMP %R9,%R8 |
0x5730ce JE 56fb33 |
0x5730d4 LEA (%R15,%R8,1),%R10 |
0x5730d8 VMOVSD (%RSI,%R10,8),%XMM6 |
0x5730de MOV (%RBX,%R10,8),%R11 |
0x5730e2 MOV 0x8(%RBX,%R10,8),%R13 |
0x5730e7 SUB %R11,%R13 |
0x5730ea JLE 573224 |
0x5730f0 MOV %R13,%R12 |
0x5730f3 AND $-0x8,%R12 |
0x5730f7 JE 57317e |
0x5730fd MOV -0x30(%RBP),%RAX |
0x573101 LEA -0x1(%R12),%R14 |
0x573106 VXORPD %XMM7,%XMM7,%XMM7 |
0x57310a LEA (%RAX,%R11,8),%RSI |
0x57310e MOV 0x10(%RBP),%RAX |
0x573112 LEA (%RAX,%R11,8),%RDX |
0x573116 XOR %EAX,%EAX |
0x573118 NOPL (%RAX,%RAX,1) |
(6442) 0x573120 VMOVUPD (%RDX,%RAX,8),%ZMM8 |
(6442) 0x573127 KXNORW %K0,%K0,%K1 |
(6442) 0x57312b VXORPD %XMM9,%XMM9,%XMM9 |
(6442) 0x573130 VGATHERQPD (%RCX,%ZMM8,8),%ZMM9{%K1} |
(6442) 0x573137 VFNMADD231PD (%RSI,%RAX,8),%ZMM9,%ZMM7 |
(6442) 0x57313e ADD $0x8,%RAX |
(6442) 0x573142 CMP %R14,%RAX |
(6442) 0x573145 JBE 573120 |
0x573147 VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x57314e MOV 0x28(%RBP),%RSI |
0x573152 VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x573158 VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x57315e VADDPD %XMM7,%XMM8,%XMM7 |
0x573162 VPERMILPD $0x1,%XMM7,%XMM8 |
0x573168 VADDSD %XMM7,%XMM8,%XMM7 |
0x57316c VADDSD %XMM7,%XMM6,%XMM6 |
0x573170 CMP %R12,%R13 |
0x573173 JNE 573189 |
0x573175 MOV 0x30(%RBP),%RDX |
0x573179 JMP 573224 |
0x57317e VPBROADCASTQ %R13,%ZMM7 |
0x573184 XOR %R12D,%R12D |
0x573187 JMP 573193 |
0x573189 MOV 0x30(%RBP),%RDX |
0x57318d VPBROADCASTQ %R13,%ZMM7 |
0x573193 MOV 0x10(%RBP),%RAX |
0x573197 VPBROADCASTQ %R12,%ZMM8 |
0x57319d ADD %R12,%R11 |
0x5731a0 VPSUBQ %ZMM8,%ZMM7,%ZMM7 |
0x5731a6 VPXOR %XMM8,%XMM8,%XMM8 |
0x5731ab VPCMPNLEUQ %ZMM4,%ZMM7,%K1 |
0x5731b2 KMOVQ %K1,%K2 |
0x5731b7 VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} |
0x5731be MOV -0x30(%RBP),%RAX |
0x5731c2 VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} |
0x5731c9 VMOVDQA64 %ZMM7,%ZMM2{%K1} |
0x5731cf VPSLLQ $0x3,%ZMM2,%ZMM7 |
0x5731d6 VPADDQ %ZMM7,%ZMM3,%ZMM7 |
0x5731dc VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} |
0x5731e7 VMOVAPD %ZMM9,%ZMM0{%K1} |
0x5731ed VMOVAPD %ZMM8,%ZMM1{%K1} |
0x5731f3 VXORPD %ZMM5,%ZMM1,%ZMM8 |
0x5731f9 VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} |
0x5731ff VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x573206 VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x57320c VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x573212 VADDPD %XMM7,%XMM8,%XMM7 |
0x573216 VPERMILPD $0x1,%XMM7,%XMM8 |
0x57321c VADDSD %XMM7,%XMM8,%XMM7 |
0x573220 VADDSD %XMM7,%XMM6,%XMM6 |
0x573224 VMOVSD %XMM6,(%RDX,%R10,8) |
0x57322a VMOVSD 0x8(%RSI,%R10,8),%XMM6 |
0x573231 MOV 0x8(%RBX,%R10,8),%R11 |
0x573236 MOV 0x10(%RBX,%R10,8),%R13 |
0x57323b SUB %R11,%R13 |
0x57323e JLE 573370 |
0x573244 MOV %R13,%R12 |
0x573247 AND $-0x8,%R12 |
0x57324b JE 5732ce |
0x573251 MOV -0x30(%RBP),%RAX |
0x573255 LEA -0x1(%R12),%R14 |
0x57325a VXORPD %XMM7,%XMM7,%XMM7 |
0x57325e LEA (%RAX,%R11,8),%RSI |
0x573262 MOV 0x10(%RBP),%RAX |
0x573266 LEA (%RAX,%R11,8),%RDX |
0x57326a XOR %EAX,%EAX |
0x57326c NOPL (%RAX) |
(6441) 0x573270 VMOVUPD (%RDX,%RAX,8),%ZMM8 |
(6441) 0x573277 KXNORW %K0,%K0,%K1 |
(6441) 0x57327b VXORPD %XMM9,%XMM9,%XMM9 |
(6441) 0x573280 VGATHERQPD (%RCX,%ZMM8,8),%ZMM9{%K1} |
(6441) 0x573287 VFNMADD231PD (%RSI,%RAX,8),%ZMM9,%ZMM7 |
(6441) 0x57328e ADD $0x8,%RAX |
(6441) 0x573292 CMP %R14,%RAX |
(6441) 0x573295 JBE 573270 |
0x573297 VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x57329e MOV 0x30(%RBP),%RDX |
0x5732a2 MOV 0x28(%RBP),%RSI |
0x5732a6 VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x5732ac VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x5732b2 VADDPD %XMM7,%XMM8,%XMM7 |
0x5732b6 VPERMILPD $0x1,%XMM7,%XMM8 |
0x5732bc VADDSD %XMM7,%XMM8,%XMM7 |
0x5732c0 VADDSD %XMM7,%XMM6,%XMM6 |
0x5732c4 CMP %R12,%R13 |
0x5732c7 JNE 5732d9 |
0x5732c9 JMP 573370 |
0x5732ce VPBROADCASTQ %R13,%ZMM7 |
0x5732d4 XOR %R12D,%R12D |
0x5732d7 JMP 5732df |
0x5732d9 VPBROADCASTQ %R13,%ZMM7 |
0x5732df MOV 0x10(%RBP),%RAX |
0x5732e3 VPBROADCASTQ %R12,%ZMM8 |
0x5732e9 ADD %R12,%R11 |
0x5732ec VPSUBQ %ZMM8,%ZMM7,%ZMM7 |
0x5732f2 VPXOR %XMM8,%XMM8,%XMM8 |
0x5732f7 VPCMPNLEUQ %ZMM4,%ZMM7,%K1 |
0x5732fe KMOVQ %K1,%K2 |
0x573303 VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} |
0x57330a MOV -0x30(%RBP),%RAX |
0x57330e VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} |
0x573315 VMOVDQA64 %ZMM7,%ZMM2{%K1} |
0x57331b VPSLLQ $0x3,%ZMM2,%ZMM7 |
0x573322 VPADDQ %ZMM7,%ZMM3,%ZMM7 |
0x573328 VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} |
0x573333 VMOVAPD %ZMM9,%ZMM0{%K1} |
0x573339 VMOVAPD %ZMM8,%ZMM1{%K1} |
0x57333f VXORPD %ZMM5,%ZMM1,%ZMM8 |
0x573345 VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} |
0x57334b VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x573352 VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x573358 VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x57335e VADDPD %XMM7,%XMM8,%XMM7 |
0x573362 VPERMILPD $0x1,%XMM7,%XMM8 |
0x573368 VADDSD %XMM7,%XMM8,%XMM7 |
0x57336c VADDSD %XMM7,%XMM6,%XMM6 |
0x573370 LEA 0x1(%R8,%R15,1),%RAX |
0x573375 VMOVSD %XMM6,(%RDX,%RAX,8) |
0x57337a VMOVSD 0x10(%RSI,%R10,8),%XMM6 |
0x573381 MOV 0x10(%RBX,%R10,8),%R11 |
0x573386 MOV 0x18(%RBX,%R10,8),%R13 |
0x57338b SUB %R11,%R13 |
0x57338e JLE 5734c0 |
0x573394 MOV %R13,%R12 |
0x573397 AND $-0x8,%R12 |
0x57339b JE 57341e |
0x5733a1 MOV -0x30(%RBP),%RAX |
0x5733a5 LEA -0x1(%R12),%R14 |
0x5733aa VXORPD %XMM7,%XMM7,%XMM7 |
0x5733ae LEA (%RAX,%R11,8),%RSI |
0x5733b2 MOV 0x10(%RBP),%RAX |
0x5733b6 LEA (%RAX,%R11,8),%RDX |
0x5733ba XOR %EAX,%EAX |
0x5733bc NOPL (%RAX) |
(6440) 0x5733c0 VMOVUPD (%RDX,%RAX,8),%ZMM8 |
(6440) 0x5733c7 KXNORW %K0,%K0,%K1 |
(6440) 0x5733cb VXORPD %XMM9,%XMM9,%XMM9 |
(6440) 0x5733d0 VGATHERQPD (%RCX,%ZMM8,8),%ZMM9{%K1} |
(6440) 0x5733d7 VFNMADD231PD (%RSI,%RAX,8),%ZMM9,%ZMM7 |
(6440) 0x5733de ADD $0x8,%RAX |
(6440) 0x5733e2 CMP %R14,%RAX |
(6440) 0x5733e5 JBE 5733c0 |
0x5733e7 VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x5733ee MOV 0x30(%RBP),%RDX |
0x5733f2 MOV 0x28(%RBP),%RSI |
0x5733f6 VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x5733fc VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x573402 VADDPD %XMM7,%XMM8,%XMM7 |
0x573406 VPERMILPD $0x1,%XMM7,%XMM8 |
0x57340c VADDSD %XMM7,%XMM8,%XMM7 |
0x573410 VADDSD %XMM7,%XMM6,%XMM6 |
0x573414 CMP %R12,%R13 |
0x573417 JNE 573429 |
0x573419 JMP 5734c0 |
0x57341e VPBROADCASTQ %R13,%ZMM7 |
0x573424 XOR %R12D,%R12D |
0x573427 JMP 57342f |
0x573429 VPBROADCASTQ %R13,%ZMM7 |
0x57342f MOV 0x10(%RBP),%RAX |
0x573433 VPBROADCASTQ %R12,%ZMM8 |
0x573439 ADD %R12,%R11 |
0x57343c VPSUBQ %ZMM8,%ZMM7,%ZMM7 |
0x573442 VPXOR %XMM8,%XMM8,%XMM8 |
0x573447 VPCMPNLEUQ %ZMM4,%ZMM7,%K1 |
0x57344e KMOVQ %K1,%K2 |
0x573453 VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} |
0x57345a MOV -0x30(%RBP),%RAX |
0x57345e VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} |
0x573465 VMOVDQA64 %ZMM7,%ZMM2{%K1} |
0x57346b VPSLLQ $0x3,%ZMM2,%ZMM7 |
0x573472 VPADDQ %ZMM7,%ZMM3,%ZMM7 |
0x573478 VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} |
0x573483 VMOVAPD %ZMM9,%ZMM0{%K1} |
0x573489 VMOVAPD %ZMM8,%ZMM1{%K1} |
0x57348f VXORPD %ZMM5,%ZMM1,%ZMM8 |
0x573495 VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} |
0x57349b VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x5734a2 VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x5734a8 VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x5734ae VADDPD %XMM7,%XMM8,%XMM7 |
0x5734b2 VPERMILPD $0x1,%XMM7,%XMM8 |
0x5734b8 VADDSD %XMM7,%XMM8,%XMM7 |
0x5734bc VADDSD %XMM7,%XMM6,%XMM6 |
0x5734c0 LEA 0x2(%R8,%R15,1),%RAX |
0x5734c5 VMOVSD %XMM6,(%RDX,%RAX,8) |
0x5734ca VMOVSD 0x18(%RSI,%R10,8),%XMM6 |
0x5734d1 MOV 0x18(%RBX,%R10,8),%R11 |
0x5734d6 MOV 0x20(%RBX,%R10,8),%R13 |
0x5734db SUB %R11,%R13 |
0x5734de JLE 573610 |
0x5734e4 MOV %R13,%R12 |
0x5734e7 AND $-0x8,%R12 |
0x5734eb JE 57356e |
0x5734f1 MOV -0x30(%RBP),%RAX |
0x5734f5 LEA -0x1(%R12),%R14 |
0x5734fa VXORPD %XMM7,%XMM7,%XMM7 |
0x5734fe LEA (%RAX,%R11,8),%RSI |
0x573502 MOV 0x10(%RBP),%RAX |
0x573506 LEA (%RAX,%R11,8),%RDX |
0x57350a XOR %EAX,%EAX |
0x57350c NOPL (%RAX) |
(6439) 0x573510 VMOVUPD (%RDX,%RAX,8),%ZMM8 |
(6439) 0x573517 KXNORW %K0,%K0,%K1 |
(6439) 0x57351b VXORPD %XMM9,%XMM9,%XMM9 |
(6439) 0x573520 VGATHERQPD (%RCX,%ZMM8,8),%ZMM9{%K1} |
(6439) 0x573527 VFNMADD231PD (%RSI,%RAX,8),%ZMM9,%ZMM7 |
(6439) 0x57352e ADD $0x8,%RAX |
(6439) 0x573532 CMP %R14,%RAX |
(6439) 0x573535 JBE 573510 |
0x573537 VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x57353e MOV 0x30(%RBP),%RDX |
0x573542 MOV 0x28(%RBP),%RSI |
0x573546 VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x57354c VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x573552 VADDPD %XMM7,%XMM8,%XMM7 |
0x573556 VPERMILPD $0x1,%XMM7,%XMM8 |
0x57355c VADDSD %XMM7,%XMM8,%XMM7 |
0x573560 VADDSD %XMM7,%XMM6,%XMM6 |
0x573564 CMP %R12,%R13 |
0x573567 JNE 573579 |
0x573569 JMP 573610 |
0x57356e VPBROADCASTQ %R13,%ZMM7 |
0x573574 XOR %R12D,%R12D |
0x573577 JMP 57357f |
0x573579 VPBROADCASTQ %R13,%ZMM7 |
0x57357f MOV 0x10(%RBP),%RAX |
0x573583 VPBROADCASTQ %R12,%ZMM8 |
0x573589 ADD %R12,%R11 |
0x57358c VPSUBQ %ZMM8,%ZMM7,%ZMM7 |
0x573592 VPXOR %XMM8,%XMM8,%XMM8 |
0x573597 VPCMPNLEUQ %ZMM4,%ZMM7,%K1 |
0x57359e KMOVQ %K1,%K2 |
0x5735a3 VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} |
0x5735aa MOV -0x30(%RBP),%RAX |
0x5735ae VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} |
0x5735b5 VMOVDQA64 %ZMM7,%ZMM2{%K1} |
0x5735bb VPSLLQ $0x3,%ZMM2,%ZMM7 |
0x5735c2 VPADDQ %ZMM7,%ZMM3,%ZMM7 |
0x5735c8 VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} |
0x5735d3 VMOVAPD %ZMM9,%ZMM0{%K1} |
0x5735d9 VMOVAPD %ZMM8,%ZMM1{%K1} |
0x5735df VXORPD %ZMM5,%ZMM1,%ZMM8 |
0x5735e5 VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} |
0x5735eb VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x5735f2 VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x5735f8 VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x5735fe VADDPD %XMM7,%XMM8,%XMM7 |
0x573602 VPERMILPD $0x1,%XMM7,%XMM8 |
0x573608 VADDSD %XMM7,%XMM8,%XMM7 |
0x57360c VADDSD %XMM7,%XMM6,%XMM6 |
0x573610 LEA 0x3(%R8,%R15,1),%RAX |
0x573615 VMOVSD %XMM6,(%RDX,%RAX,8) |
0x57361a VMOVSD 0x20(%RSI,%R10,8),%XMM6 |
0x573621 MOV 0x20(%RBX,%R10,8),%R11 |
0x573626 MOV 0x28(%RBX,%R10,8),%R13 |
0x57362b SUB %R11,%R13 |
0x57362e JLE 573760 |
0x573634 MOV %R13,%R12 |
0x573637 AND $-0x8,%R12 |
0x57363b JE 5736be |
0x573641 MOV -0x30(%RBP),%RAX |
0x573645 LEA -0x1(%R12),%R14 |
0x57364a VXORPD %XMM7,%XMM7,%XMM7 |
0x57364e LEA (%RAX,%R11,8),%RSI |
0x573652 MOV 0x10(%RBP),%RAX |
0x573656 LEA (%RAX,%R11,8),%RDX |
0x57365a XOR %EAX,%EAX |
0x57365c NOPL (%RAX) |
(6438) 0x573660 VMOVUPD (%RDX,%RAX,8),%ZMM8 |
(6438) 0x573667 KXNORW %K0,%K0,%K1 |
(6438) 0x57366b VXORPD %XMM9,%XMM9,%XMM9 |
(6438) 0x573670 VGATHERQPD (%RCX,%ZMM8,8),%ZMM9{%K1} |
(6438) 0x573677 VFNMADD231PD (%RSI,%RAX,8),%ZMM9,%ZMM7 |
(6438) 0x57367e ADD $0x8,%RAX |
(6438) 0x573682 CMP %R14,%RAX |
(6438) 0x573685 JBE 573660 |
0x573687 VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x57368e MOV 0x30(%RBP),%RDX |
0x573692 MOV 0x28(%RBP),%RSI |
0x573696 VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x57369c VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x5736a2 VADDPD %XMM7,%XMM8,%XMM7 |
0x5736a6 VPERMILPD $0x1,%XMM7,%XMM8 |
0x5736ac VADDSD %XMM7,%XMM8,%XMM7 |
0x5736b0 VADDSD %XMM7,%XMM6,%XMM6 |
0x5736b4 CMP %R12,%R13 |
0x5736b7 JNE 5736c9 |
0x5736b9 JMP 573760 |
0x5736be VPBROADCASTQ %R13,%ZMM7 |
0x5736c4 XOR %R12D,%R12D |
0x5736c7 JMP 5736cf |
0x5736c9 VPBROADCASTQ %R13,%ZMM7 |
0x5736cf MOV 0x10(%RBP),%RAX |
0x5736d3 VPBROADCASTQ %R12,%ZMM8 |
0x5736d9 ADD %R12,%R11 |
0x5736dc VPSUBQ %ZMM8,%ZMM7,%ZMM7 |
0x5736e2 VPXOR %XMM8,%XMM8,%XMM8 |
0x5736e7 VPCMPNLEUQ %ZMM4,%ZMM7,%K1 |
0x5736ee KMOVQ %K1,%K2 |
0x5736f3 VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} |
0x5736fa MOV -0x30(%RBP),%RAX |
0x5736fe VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} |
0x573705 VMOVDQA64 %ZMM7,%ZMM2{%K1} |
0x57370b VPSLLQ $0x3,%ZMM2,%ZMM7 |
0x573712 VPADDQ %ZMM7,%ZMM3,%ZMM7 |
0x573718 VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} |
0x573723 VMOVAPD %ZMM9,%ZMM0{%K1} |
0x573729 VMOVAPD %ZMM8,%ZMM1{%K1} |
0x57372f VXORPD %ZMM5,%ZMM1,%ZMM8 |
0x573735 VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} |
0x57373b VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x573742 VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x573748 VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x57374e VADDPD %XMM7,%XMM8,%XMM7 |
0x573752 VPERMILPD $0x1,%XMM7,%XMM8 |
0x573758 VADDSD %XMM7,%XMM8,%XMM7 |
0x57375c VADDSD %XMM7,%XMM6,%XMM6 |
0x573760 LEA 0x4(%R8,%R15,1),%RAX |
0x573765 VMOVSD %XMM6,(%RDX,%RAX,8) |
0x57376a VMOVSD 0x28(%RSI,%R10,8),%XMM6 |
0x573771 MOV 0x28(%RBX,%R10,8),%R11 |
0x573776 MOV 0x30(%RBX,%R10,8),%R13 |
0x57377b SUB %R11,%R13 |
0x57377e JLE 5738b0 |
0x573784 MOV %R13,%R12 |
0x573787 AND $-0x8,%R12 |
0x57378b JE 57380e |
0x573791 MOV -0x30(%RBP),%RAX |
0x573795 LEA -0x1(%R12),%R14 |
0x57379a VXORPD %XMM7,%XMM7,%XMM7 |
0x57379e LEA (%RAX,%R11,8),%RSI |
0x5737a2 MOV 0x10(%RBP),%RAX |
0x5737a6 LEA (%RAX,%R11,8),%RDX |
0x5737aa XOR %EAX,%EAX |
0x5737ac NOPL (%RAX) |
(6437) 0x5737b0 VMOVUPD (%RDX,%RAX,8),%ZMM8 |
(6437) 0x5737b7 KXNORW %K0,%K0,%K1 |
(6437) 0x5737bb VXORPD %XMM9,%XMM9,%XMM9 |
(6437) 0x5737c0 VGATHERQPD (%RCX,%ZMM8,8),%ZMM9{%K1} |
(6437) 0x5737c7 VFNMADD231PD (%RSI,%RAX,8),%ZMM9,%ZMM7 |
(6437) 0x5737ce ADD $0x8,%RAX |
(6437) 0x5737d2 CMP %R14,%RAX |
(6437) 0x5737d5 JBE 5737b0 |
0x5737d7 VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x5737de MOV 0x30(%RBP),%RDX |
0x5737e2 MOV 0x28(%RBP),%RSI |
0x5737e6 VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x5737ec VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x5737f2 VADDPD %XMM7,%XMM8,%XMM7 |
0x5737f6 VPERMILPD $0x1,%XMM7,%XMM8 |
0x5737fc VADDSD %XMM7,%XMM8,%XMM7 |
0x573800 VADDSD %XMM7,%XMM6,%XMM6 |
0x573804 CMP %R12,%R13 |
0x573807 JNE 573819 |
0x573809 JMP 5738b0 |
0x57380e VPBROADCASTQ %R13,%ZMM7 |
0x573814 XOR %R12D,%R12D |
0x573817 JMP 57381f |
0x573819 VPBROADCASTQ %R13,%ZMM7 |
0x57381f MOV 0x10(%RBP),%RAX |
0x573823 VPBROADCASTQ %R12,%ZMM8 |
0x573829 ADD %R12,%R11 |
0x57382c VPSUBQ %ZMM8,%ZMM7,%ZMM7 |
0x573832 VPXOR %XMM8,%XMM8,%XMM8 |
0x573837 VPCMPNLEUQ %ZMM4,%ZMM7,%K1 |
0x57383e KMOVQ %K1,%K2 |
0x573843 VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} |
0x57384a MOV -0x30(%RBP),%RAX |
0x57384e VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} |
0x573855 VMOVDQA64 %ZMM7,%ZMM2{%K1} |
0x57385b VPSLLQ $0x3,%ZMM2,%ZMM7 |
0x573862 VPADDQ %ZMM7,%ZMM3,%ZMM7 |
0x573868 VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} |
0x573873 VMOVAPD %ZMM9,%ZMM0{%K1} |
0x573879 VMOVAPD %ZMM8,%ZMM1{%K1} |
0x57387f VXORPD %ZMM5,%ZMM1,%ZMM8 |
0x573885 VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} |
0x57388b VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x573892 VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x573898 VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x57389e VADDPD %XMM7,%XMM8,%XMM7 |
0x5738a2 VPERMILPD $0x1,%XMM7,%XMM8 |
0x5738a8 VADDSD %XMM7,%XMM8,%XMM7 |
0x5738ac VADDSD %XMM7,%XMM6,%XMM6 |
0x5738b0 LEA 0x5(%R8,%R15,1),%RAX |
0x5738b5 VMOVSD %XMM6,(%RDX,%RAX,8) |
0x5738ba VMOVSD 0x30(%RSI,%R10,8),%XMM6 |
0x5738c1 MOV 0x30(%RBX,%R10,8),%R11 |
0x5738c6 MOV 0x38(%RBX,%R10,8),%R13 |
0x5738cb SUB %R11,%R13 |
0x5738ce JLE 573a00 |
0x5738d4 MOV %R13,%R12 |
0x5738d7 AND $-0x8,%R12 |
0x5738db JE 57395e |
0x5738e1 MOV -0x30(%RBP),%RAX |
0x5738e5 LEA -0x1(%R12),%R14 |
0x5738ea VXORPD %XMM7,%XMM7,%XMM7 |
0x5738ee LEA (%RAX,%R11,8),%RSI |
0x5738f2 MOV 0x10(%RBP),%RAX |
0x5738f6 LEA (%RAX,%R11,8),%RDX |
0x5738fa XOR %EAX,%EAX |
0x5738fc NOPL (%RAX) |
(6436) 0x573900 VMOVUPD (%RDX,%RAX,8),%ZMM8 |
(6436) 0x573907 KXNORW %K0,%K0,%K1 |
(6436) 0x57390b VXORPD %XMM9,%XMM9,%XMM9 |
(6436) 0x573910 VGATHERQPD (%RCX,%ZMM8,8),%ZMM9{%K1} |
(6436) 0x573917 VFNMADD231PD (%RSI,%RAX,8),%ZMM9,%ZMM7 |
(6436) 0x57391e ADD $0x8,%RAX |
(6436) 0x573922 CMP %R14,%RAX |
(6436) 0x573925 JBE 573900 |
0x573927 VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x57392e MOV 0x30(%RBP),%RDX |
0x573932 MOV 0x28(%RBP),%RSI |
0x573936 VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x57393c VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x573942 VADDPD %XMM7,%XMM8,%XMM7 |
0x573946 VPERMILPD $0x1,%XMM7,%XMM8 |
0x57394c VADDSD %XMM7,%XMM8,%XMM7 |
0x573950 VADDSD %XMM7,%XMM6,%XMM6 |
0x573954 CMP %R12,%R13 |
0x573957 JNE 573969 |
0x573959 JMP 573a00 |
0x57395e VPBROADCASTQ %R13,%ZMM7 |
0x573964 XOR %R12D,%R12D |
0x573967 JMP 57396f |
0x573969 VPBROADCASTQ %R13,%ZMM7 |
0x57396f MOV 0x10(%RBP),%RAX |
0x573973 VPBROADCASTQ %R12,%ZMM8 |
0x573979 ADD %R12,%R11 |
0x57397c VPSUBQ %ZMM8,%ZMM7,%ZMM7 |
0x573982 VPXOR %XMM8,%XMM8,%XMM8 |
0x573987 VPCMPNLEUQ %ZMM4,%ZMM7,%K1 |
0x57398e KMOVQ %K1,%K2 |
0x573993 VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} |
0x57399a MOV -0x30(%RBP),%RAX |
0x57399e VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} |
0x5739a5 VMOVDQA64 %ZMM7,%ZMM2{%K1} |
0x5739ab VPSLLQ $0x3,%ZMM2,%ZMM7 |
0x5739b2 VPADDQ %ZMM7,%ZMM3,%ZMM7 |
0x5739b8 VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} |
0x5739c3 VMOVAPD %ZMM9,%ZMM0{%K1} |
0x5739c9 VMOVAPD %ZMM8,%ZMM1{%K1} |
0x5739cf VXORPD %ZMM5,%ZMM1,%ZMM8 |
0x5739d5 VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} |
0x5739db VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x5739e2 VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x5739e8 VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x5739ee VADDPD %XMM7,%XMM8,%XMM7 |
0x5739f2 VPERMILPD $0x1,%XMM7,%XMM8 |
0x5739f8 VADDSD %XMM7,%XMM8,%XMM7 |
0x5739fc VADDSD %XMM7,%XMM6,%XMM6 |
0x573a00 LEA 0x6(%R8,%R15,1),%RAX |
0x573a05 VMOVSD %XMM6,(%RDX,%RAX,8) |
0x573a0a VMOVSD 0x38(%RSI,%R10,8),%XMM6 |
0x573a11 MOV 0x38(%RBX,%R10,8),%R11 |
0x573a16 MOV 0x40(%RBX,%R10,8),%R12 |
0x573a1b SUB %R11,%R12 |
0x573a1e JLE 5730bd |
0x573a24 MOV %R12,%R10 |
0x573a27 AND $-0x8,%R10 |
0x573a2b JE 573023 |
0x573a31 MOV -0x30(%RBP),%RAX |
0x573a35 LEA -0x1(%R10),%R14 |
0x573a39 VXORPD %XMM7,%XMM7,%XMM7 |
0x573a3d LEA (%RAX,%R11,8),%RSI |
0x573a41 MOV 0x10(%RBP),%RAX |
0x573a45 LEA (%RAX,%R11,8),%RDX |
0x573a49 XOR %EAX,%EAX |
0x573a4b NOPL (%RAX,%RAX,1) |
(6435) 0x573a50 VMOVUPD (%RDX,%RAX,8),%ZMM8 |
(6435) 0x573a57 KXNORW %K0,%K0,%K1 |
(6435) 0x573a5b VXORPD %XMM9,%XMM9,%XMM9 |
(6435) 0x573a60 VGATHERQPD (%RCX,%ZMM8,8),%ZMM9{%K1} |
(6435) 0x573a67 VFNMADD231PD (%RSI,%RAX,8),%ZMM9,%ZMM7 |
(6435) 0x573a6e ADD $0x8,%RAX |
(6435) 0x573a72 CMP %R14,%RAX |
(6435) 0x573a75 JBE 573a50 |
0x573a77 VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x573a7e MOV 0x30(%RBP),%RDX |
0x573a82 VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x573a88 VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x573a8e VADDPD %XMM7,%XMM8,%XMM7 |
0x573a92 VPERMILPD $0x1,%XMM7,%XMM8 |
0x573a98 VADDSD %XMM7,%XMM8,%XMM7 |
0x573a9c VADDSD %XMM7,%XMM6,%XMM6 |
0x573aa0 CMP %R10,%R12 |
0x573aa3 JNE 573aae |
0x573aa5 MOV 0x28(%RBP),%RSI |
0x573aa9 JMP 5730bd |
0x573aae MOV 0x28(%RBP),%RSI |
0x573ab2 VPBROADCASTQ %R12,%ZMM7 |
0x573ab8 JMP 57302c |
/home/eoseret/qaas_runs_CPU_9468/172-019-1763/intel/AMG/build/AMG/AMG/seq_mv/csr_matvec.c: 248 - 314 |
-------------------------------------------------------------------------------- |
248: hypre_assert(iBegin <= iEnd); |
[...] |
307: for (i = iBegin; i < iEnd; i++) |
308: { |
309: tempx = b_data[i]; |
310: for (jj = A_i[i]; jj < A_i[i+1]; jj++) |
311: { |
312: tempx -= A_data[jj] * x_data[A_j[jj]]; |
313: } |
314: y_data[i] = tempx; |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►99.98+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.24 |
CQA speedup if FP arith vectorized | 1.09 |
CQA speedup if fully vectorized | 1.36 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.57 |
Bottlenecks | micro-operation queue, |
Function | hypre_CSRMatrixMatvecOutOfPlace.extracted |
Source | csr_matvec.c:248-248,csr_matvec.c:307-314 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 144.67 |
CQA cycles if no scalar integer | 116.83 |
CQA cycles if FP arith vectorized | 132.54 |
CQA cycles if fully vectorized | 106.38 |
Front-end cycles | 144.67 |
DIV/SQRT cycles | 21.75 |
P0 cycles | 21.75 |
P1 cycles | 21.50 |
P2 cycles | 21.50 |
P3 cycles | 20.50 |
P4 cycles | 40.33 |
P5 cycles | 40.33 |
P6 cycles | 40.33 |
P7 cycles | 90.00 |
P8 cycles | 82.00 |
P9 cycles | 87.92 |
P10 cycles | 92.08 |
P11 cycles | 44.00 |
P12 cycles | 44.00 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 477.00 |
Nb uops | 868.00 |
Nb loads | 98.00 |
Nb stores | 8.00 |
Nb stack references | 4.00 |
FLOP/cycle | 1.77 |
Nb FLOP add-sub | 192.00 |
Nb FLOP mul | 64.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 15.15 |
Bytes prefetched | 0.00 |
Bytes loaded | 2128.00 |
Bytes stored | 64.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 67.13 |
Vectorization ratio load | 61.54 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 55.17 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 78.95 |
Vector-efficiency ratio all | 51.66 |
Vector-efficiency ratio load | 66.35 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 46.98 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 52.63 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.24 |
CQA speedup if FP arith vectorized | 1.09 |
CQA speedup if fully vectorized | 1.36 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.57 |
Bottlenecks | micro-operation queue, |
Function | hypre_CSRMatrixMatvecOutOfPlace.extracted |
Source | csr_matvec.c:248-248,csr_matvec.c:307-314 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 144.67 |
CQA cycles if no scalar integer | 116.83 |
CQA cycles if FP arith vectorized | 132.54 |
CQA cycles if fully vectorized | 106.38 |
Front-end cycles | 144.67 |
DIV/SQRT cycles | 21.75 |
P0 cycles | 21.75 |
P1 cycles | 21.50 |
P2 cycles | 21.50 |
P3 cycles | 20.50 |
P4 cycles | 40.33 |
P5 cycles | 40.33 |
P6 cycles | 40.33 |
P7 cycles | 90.00 |
P8 cycles | 82.00 |
P9 cycles | 87.92 |
P10 cycles | 92.08 |
P11 cycles | 44.00 |
P12 cycles | 44.00 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 477.00 |
Nb uops | 868.00 |
Nb loads | 98.00 |
Nb stores | 8.00 |
Nb stack references | 4.00 |
FLOP/cycle | 1.77 |
Nb FLOP add-sub | 192.00 |
Nb FLOP mul | 64.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 15.15 |
Bytes prefetched | 0.00 |
Bytes loaded | 2128.00 |
Bytes stored | 64.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 67.13 |
Vectorization ratio load | 61.54 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | 100.00 |
Vectorization ratio add_sub | 55.17 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 78.95 |
Vector-efficiency ratio all | 51.66 |
Vector-efficiency ratio load | 66.35 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | 100.00 |
Vector-efficiency ratio add_sub | 46.98 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 52.63 |
Path / |
Function | hypre_CSRMatrixMatvecOutOfPlace.extracted |
Source file and lines | csr_matvec.c:248-314 |
Module | exec |
nb instructions | 477 |
nb uops | 868 |
loop length | 2402 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 2 |
used zmm registers | 9 |
nb stack references | 4 |
ADD-SUB / MUL ratio | 8.00 |
micro-operation queue | 144.67 cycles |
front end | 144.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 21.75 | 21.75 | 21.50 | 21.50 | 20.50 | 35.00 | 35.00 | 35.00 | 82.00 | 82.00 | 81.92 | 82.08 | 44.00 | 44.00 |
cycles | 21.75 | 21.75 | 21.50 | 21.50 | 20.50 | 40.33 | 40.33 | 40.33 | 90.00 | 82.00 | 87.92 | 92.08 | 44.00 | 44.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 144.67 |
Dispatch | 92.08 |
Overall L1 | 144.67 |
all | 54% |
load | 53% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 69% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 50% |
all | 73% |
load | 66% |
store | 0% |
mul | 100% |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 67% |
load | 61% |
store | 0% |
mul | 100% |
add-sub | 55% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 78% |
all | 54% |
load | 59% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 73% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 46% |
all | 50% |
load | 70% |
store | 12% |
mul | 100% |
add-sub | 37% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 56% |
all | 51% |
load | 66% |
store | 12% |
mul | 100% |
add-sub | 46% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 52% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VPBROADCASTQ %R12,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R10,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R10,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
LEA 0x7(%R8,%R15,1),%RAX | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVSD %XMM6,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
CMP %R9,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 56fb33 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x1273> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA (%R15,%R8,1),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVSD (%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV (%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RBX,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %R11,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 573224 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4964> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 57317e <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x48be> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R12),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R12,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 573189 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x48c9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 573224 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4964> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 573193 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x48d3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VMOVSD %XMM6,(%RDX,%R10,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVSD 0x8(%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x8(%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x10(%RBX,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %R11,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 573370 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4ab0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 5732ce <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4a0e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R12),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R12,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 5732d9 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4a19> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 573370 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4ab0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 5732df <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4a1f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
LEA 0x1(%R8,%R15,1),%RAX | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
VMOVSD %XMM6,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVSD 0x10(%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x10(%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x18(%RBX,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %R11,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 5734c0 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4c00> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 57341e <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4b5e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R12),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R12,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 573429 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4b69> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 5734c0 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4c00> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 57342f <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4b6f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
LEA 0x2(%R8,%R15,1),%RAX | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
VMOVSD %XMM6,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVSD 0x18(%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x18(%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x20(%RBX,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %R11,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 573610 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4d50> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 57356e <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4cae> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R12),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R12,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 573579 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4cb9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 573610 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4d50> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 57357f <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4cbf> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
LEA 0x3(%R8,%R15,1),%RAX | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
VMOVSD %XMM6,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVSD 0x20(%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x20(%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x28(%RBX,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %R11,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 573760 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4ea0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 5736be <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4dfe> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R12),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R12,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 5736c9 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4e09> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 573760 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4ea0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 5736cf <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4e0f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
LEA 0x4(%R8,%R15,1),%RAX | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
VMOVSD %XMM6,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVSD 0x28(%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x28(%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x30(%RBX,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %R11,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 5738b0 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4ff0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 57380e <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4f4e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R12),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R12,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 573819 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4f59> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 5738b0 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4ff0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 57381f <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4f5f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
LEA 0x5(%R8,%R15,1),%RAX | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
VMOVSD %XMM6,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVSD 0x30(%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x30(%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x38(%RBX,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %R11,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 573a00 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x5140> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 57395e <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x509e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R12),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R12,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 573969 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x50a9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 573a00 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x5140> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 57396f <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x50af> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
LEA 0x6(%R8,%R15,1),%RAX | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
VMOVSD %XMM6,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVSD 0x38(%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x38(%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x40(%RBX,%R10,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
SUB %R11,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JLE 5730bd <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x47fd> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R12,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 573023 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4763> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R10),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R10,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JNE 573aae <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x51ee> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 5730bd <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x47fd> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
JMP 57302c <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x476c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
Function | hypre_CSRMatrixMatvecOutOfPlace.extracted |
Source file and lines | csr_matvec.c:248-314 |
Module | exec |
nb instructions | 477 |
nb uops | 868 |
loop length | 2402 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 2 |
used zmm registers | 9 |
nb stack references | 4 |
ADD-SUB / MUL ratio | 8.00 |
micro-operation queue | 144.67 cycles |
front end | 144.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 21.75 | 21.75 | 21.50 | 21.50 | 20.50 | 35.00 | 35.00 | 35.00 | 82.00 | 82.00 | 81.92 | 82.08 | 44.00 | 44.00 |
cycles | 21.75 | 21.75 | 21.50 | 21.50 | 20.50 | 40.33 | 40.33 | 40.33 | 90.00 | 82.00 | 87.92 | 92.08 | 44.00 | 44.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 144.67 |
Dispatch | 92.08 |
Overall L1 | 144.67 |
all | 54% |
load | 53% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 69% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 50% |
all | 73% |
load | 66% |
store | 0% |
mul | 100% |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 100% |
all | 67% |
load | 61% |
store | 0% |
mul | 100% |
add-sub | 55% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 78% |
all | 54% |
load | 59% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 73% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 46% |
all | 50% |
load | 70% |
store | 12% |
mul | 100% |
add-sub | 37% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 56% |
all | 51% |
load | 66% |
store | 12% |
mul | 100% |
add-sub | 46% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 52% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
VPBROADCASTQ %R12,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R10,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R10,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
LEA 0x7(%R8,%R15,1),%RAX | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
ADD $0x8,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVSD %XMM6,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
CMP %R9,%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 56fb33 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x1273> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA (%R15,%R8,1),%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VMOVSD (%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV (%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%RBX,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %R11,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 573224 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4964> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 57317e <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x48be> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R12),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R12,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 573189 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x48c9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 573224 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4964> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 573193 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x48d3> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VMOVSD %XMM6,(%RDX,%R10,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVSD 0x8(%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x8(%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x10(%RBX,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %R11,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 573370 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4ab0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 5732ce <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4a0e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R12),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R12,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 5732d9 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4a19> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 573370 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4ab0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 5732df <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4a1f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
LEA 0x1(%R8,%R15,1),%RAX | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
VMOVSD %XMM6,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVSD 0x10(%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x10(%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x18(%RBX,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %R11,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 5734c0 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4c00> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 57341e <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4b5e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R12),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R12,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 573429 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4b69> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 5734c0 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4c00> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 57342f <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4b6f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
LEA 0x2(%R8,%R15,1),%RAX | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
VMOVSD %XMM6,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVSD 0x18(%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x18(%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x20(%RBX,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %R11,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 573610 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4d50> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 57356e <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4cae> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R12),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R12,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 573579 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4cb9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 573610 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4d50> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 57357f <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4cbf> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
LEA 0x3(%R8,%R15,1),%RAX | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
VMOVSD %XMM6,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVSD 0x20(%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x20(%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x28(%RBX,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %R11,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 573760 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4ea0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 5736be <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4dfe> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R12),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R12,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 5736c9 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4e09> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 573760 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4ea0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 5736cf <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4e0f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
LEA 0x4(%R8,%R15,1),%RAX | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
VMOVSD %XMM6,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVSD 0x28(%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x28(%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x30(%RBX,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %R11,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 5738b0 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4ff0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 57380e <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4f4e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R12),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R12,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 573819 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4f59> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 5738b0 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4ff0> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 57381f <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4f5f> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
LEA 0x5(%R8,%R15,1),%RAX | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
VMOVSD %XMM6,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVSD 0x30(%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x30(%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x38(%RBX,%R10,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %R11,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 573a00 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x5140> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R13,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 57395e <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x509e> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R12),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R12,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JNE 573969 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x50a9> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
JMP 573a00 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x5140> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP 57396f <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x50af> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %R13,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
ADD %R12,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM8,%XMM8,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VPCMPNLEUQ %ZMM4,%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQU64 (%RAX,%R11,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMOVUPD (%RAX,%R11,8),%ZMM9{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVDQA64 %ZMM7,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM7,%ZMM3,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VGATHERQPD (,%ZMM7,1),%ZMM8{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVAPD %ZMM9,%ZMM0{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VMOVAPD %ZMM8,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD %ZMM5,%ZMM1,%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VMULPD %ZMM8,%ZMM9,%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
LEA 0x6(%R8,%R15,1),%RAX | 2 | 0 | 0 | 0.50 | 0.50 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0.50 | N/A |
VMOVSD %XMM6,(%RDX,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
VMOVSD 0x38(%RSI,%R10,8),%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
MOV 0x38(%RBX,%R10,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x40(%RBX,%R10,8),%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
SUB %R11,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JLE 5730bd <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x47fd> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R12,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%R10 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 573023 <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x4763> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA -0x1(%R10),%R14 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
LEA (%RAX,%R11,8),%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA (%RAX,%R11,8),%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
MOV 0x30(%RBP),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %R10,%R12 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JNE 573aae <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x51ee> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JMP 5730bd <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x47fd> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV 0x28(%RBP),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R12,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
JMP 57302c <hypre_CSRMatrixMatvecOutOfPlace.extracted+0x476c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |