Loop Id: 109 | Module: exec | Source: advec_cell.cpp:159-202 [...] | Coverage: 0.01% |
---|
Loop Id: 109 | Module: exec | Source: advec_cell.cpp:159-202 [...] | Coverage: 0.01% |
---|
0x41ad20 CMP %R10D,%R13D |
0x41ad23 CMOVBE %R13D,%R10D |
0x41ad27 MOV 0x2f8(%RSP),%R13D |
0x41ad2f MOV %R10D,%R12D |
0x41ad32 LEA (%R13,%R10,1),%EAX |
0x41ad37 LEA 0x3(%RCX),%R10D |
0x41ad3b MOV %EAX,0x2e4(%RSP) |
0x41ad42 MOV %R10D,0x2cc(%RSP) |
0x41ad4a CMP %EAX,%R13D |
0x41ad4d JAE 41b750 |
0x41ad53 MOV 0x30(%RSP),%RDI |
0x41ad58 MOV 0x38(%RSP),%R14 |
0x41ad5d MOV 0x28(%RSP),%R9 |
0x41ad62 MOV 0x18(%RSP),%RAX |
0x41ad67 MOV (%RDI),%RBX |
0x41ad6a MOV 0x10(%RDI),%R10 |
0x41ad6e MOV 0x10(%RSP),%RDI |
0x41ad73 MOV 0x10(%R14),%RSI |
0x41ad77 MOV (%R14),%R8 |
0x41ad7a MOV 0x8(%R9),%R14 |
0x41ad7e MOV %RBX,0x2d8(%RSP) |
0x41ad86 MOV 0x10(%RDI),%R9 |
0x41ad8a MOV 0x2e8(%RSP),%R15 |
0x41ad92 MOV %RSI,0x2c0(%RSP) |
0x41ad9a MOV 0x10(%RAX),%RSI |
0x41ad9e MOV (%RAX),%RBX |
0x41ada1 MOV %R9,0x1c0(%RSP) |
0x41ada9 MOV (%RDI),%R9 |
0x41adac LEA 0x1(%R15),%R11 |
0x41adb0 MOV 0x8(%RSP),%RDI |
0x41adb5 MOV %RSI,0x78(%RSP) |
0x41adba IMUL %R11,%R8 |
0x41adbe LEA (%R14,%R11,8),%R15 |
0x41adc2 IMUL %R11,%RBX |
0x41adc6 MOV 0x20(%RSP),%R13 |
0x41adcb MOV %R11,0x2f0(%RSP) |
0x41add3 MOV (%RDI),%RSI |
0x41add6 MOV 0x10(%RDI),%RAX |
0x41adda MOV 0x10(%R13),%RDX |
0x41adde MOV (%R13),%R13 |
0x41ade2 MOV %R8,0x68(%RSP) |
0x41ade7 IMUL %R11,%RSI |
0x41adeb MOV %RAX,0x70(%RSP) |
0x41adf0 LEA 0x1(%RCX),%EAX |
0x41adf3 MOVSXD %ECX,%RCX |
0x41adf6 MOV %EAX,0x2c8(%RSP) |
0x41adfd MOVSXD %EAX,%RDI |
0x41ae00 MOV %RCX,0x2d0(%RSP) |
0x41ae08 MOV %RBX,0x60(%RSP) |
0x41ae0d MOV %RSI,%R11 |
0x41ae10 MOV %RSI,0x58(%RSP) |
0x41ae15 MOV 0x48(%RSP),%ESI |
0x41ae19 MOVSXD 0x2cc(%RSP),%RCX |
0x41ae21 ADD $0x2,%ESI |
0x41ae24 MOVSXD %ESI,%RAX |
0x41ae27 LEA -0x1(%R12),%ESI |
0x41ae2c CMP %ECX,%EAX |
0x41ae2e MOV %EAX,0x2e0(%RSP) |
0x41ae35 CMOVLE %RAX,%RCX |
0x41ae39 CMP $0xe,%ESI |
0x41ae3c JBE 41b433 |
0x41ae42 MOVSXD 0x2fc(%RSP),%RAX |
0x41ae4a MOV 0x2c0(%RSP),%RSI |
0x41ae52 VPBROADCASTQ %RDI,%ZMM29 |
0x41ae58 VPBROADCASTQ %RCX,%ZMM22 |
0x41ae5e KXNORB %K1,%K1,%K1 |
0x41ae62 VPBROADCASTD 0x2fc(%RSP),%ZMM0 |
0x41ae6d MOV 0x78(%RSP),%RDI |
0x41ae72 VPBROADCASTQ %R13,%ZMM27 |
0x41ae78 VPBROADCASTQ %R9,%ZMM30 |
0x41ae7e LEA (%RBX,%RAX,1),%RBX |
0x41ae82 ADD %RAX,%R8 |
0x41ae85 ADD %R11,%RAX |
0x41ae88 MOV 0x70(%RSP),%R11 |
0x41ae8d VPADDD 0x31769(%RIP),%ZMM0,%ZMM7 |
0x41ae97 VPBROADCASTQ 0x2d0(%RSP),%ZMM1 |
0x41ae9f LEA (%RSI,%R8,8),%R8 |
0x41aea3 MOV $0x10,%ESI |
0x41aea8 LEA (%R11,%RAX,8),%RCX |
0x41aeac VPBROADCASTQ 0x2d8(%RSP),%ZMM2 |
0x41aeb4 MOV %R12D,%R11D |
0x41aeb7 LEA (%RDI,%RBX,8),%RDI |
0x41aebb SHR $0x4,%R11D |
0x41aebf VPBROADCASTD %ESI,%ZMM3 |
0x41aec5 VPBROADCASTQ 0x2f0(%RSP),%ZMM25 |
0x41aecd VBROADCASTSD 0x318a9(%RIP),%ZMM28 |
0x41aed7 VBROADCASTSD 0x318b7(%RIP),%ZMM31 |
0x41aee1 MOV 0x1c0(%RSP),%RBX |
0x41aee9 VMOVDQA32 %ZMM7,0x280(%RSP) |
0x41aef1 SAL $0x7,%R11 |
0x41aef5 VMOVDQA64 %ZMM1,0x100(%RSP) |
0x41aefd XOR %EAX,%EAX |
0x41aeff VXORPD %XMM26,%XMM26,%XMM26 |
0x41af05 VMOVDQA64 %ZMM2,0xc0(%RSP) |
0x41af0d VMOVDQA32 %ZMM3,0x80(%RSP) |
0x41af15 NOPL (%RAX) |
(111) 0x41af18 VMOVUPD (%R8,%RAX,1),%ZMM9 |
(111) 0x41af1f VMOVDQA64 0xc0(%RSP),%ZMM0 |
(111) 0x41af27 KMOVB %K1,%K4 |
(111) 0x41af2b KMOVB %K1,%K6 |
(111) 0x41af2f KMOVB %K1,%K7 |
(111) 0x41af33 KMOVB %K1,%K5 |
(111) 0x41af37 VMOVUPD 0x40(%R8,%RAX,1),%ZMM10 |
(111) 0x41af3f VMOVDQA32 0x280(%RSP),%ZMM6 |
(111) 0x41af47 VCMPPD $0xe,%ZMM26,%ZMM9,%K3 |
(111) 0x41af4e VMOVDQA64 0x100(%RSP),%ZMM15 |
(111) 0x41af56 VCMPPD $0xe,%ZMM26,%ZMM10,%K2 |
(111) 0x41af5d VPADDD 0x80(%RSP),%ZMM6,%ZMM5 |
(111) 0x41af65 VPMOVSXDQ %YMM6,%ZMM7 |
(111) 0x41af6b VBROADCASTSD (%R15),%ZMM10 |
(111) 0x41af71 VEXTRACTI32X8 $0x1,%ZMM6,%YMM8 |
(111) 0x41af78 VPBLENDMQ %ZMM29,%ZMM25,%ZMM13{%K3} |
(111) 0x41af7e VMOVDQA32 %ZMM5,0x280(%RSP) |
(111) 0x41af86 VPBLENDMQ %ZMM29,%ZMM22,%ZMM11{%K3} |
(111) 0x41af8c VANDPD (%R8,%RAX,1),%ZMM28,%ZMM5 |
(111) 0x41af93 VXORPS %XMM20,%XMM20,%XMM20 |
(111) 0x41af99 VPMULLQ %ZMM0,%ZMM13,%ZMM20 |
(111) 0x41af9f VPBLENDMQ %ZMM29,%ZMM25,%ZMM12{%K2} |
(111) 0x41afa5 VPBLENDMQ %ZMM29,%ZMM22,%ZMM4{%K2} |
(111) 0x41afab VXORPS %XMM3,%XMM3,%XMM3 |
(111) 0x41afaf VPMULLQ %ZMM0,%ZMM12,%ZMM3 |
(111) 0x41afb5 VGATHERQPD (%R14,%ZMM11,8),%ZMM0{%K6} |
(111) 0x41afbc VGATHERQPD (%R14,%ZMM4,8),%ZMM11{%K7} |
(111) 0x41afc3 KMOVB %K1,%K6 |
(111) 0x41afc7 KMOVB %K1,%K7 |
(111) 0x41afcb VPMOVSXDQ %YMM8,%ZMM2 |
(111) 0x41afd1 VPBLENDMQ %ZMM15,%ZMM22,%ZMM16{%K3} |
(111) 0x41afd7 VPBLENDMQ %ZMM15,%ZMM22,%ZMM15{%K2} |
(111) 0x41afdd VANDPD 0x40(%R8,%RAX,1),%ZMM28,%ZMM8 |
(111) 0x41afe5 VPBLENDMQ %ZMM25,%ZMM29,%ZMM14{%K3} |
(111) 0x41afeb VPBLENDMQ %ZMM25,%ZMM29,%ZMM18{%K2} |
(111) 0x41aff1 KMOVB %K1,%K3 |
(111) 0x41aff5 KMOVB %K1,%K2 |
(111) 0x41aff9 VXORPS %XMM21,%XMM21,%XMM21 |
(111) 0x41afff VPMULLQ %ZMM27,%ZMM12,%ZMM21 |
(111) 0x41b005 VXORPS %XMM17,%XMM17,%XMM17 |
(111) 0x41b00b VPMULLQ %ZMM18,%ZMM27,%ZMM17 |
(111) 0x41b011 VPADDQ %ZMM7,%ZMM20,%ZMM20 |
(111) 0x41b017 VPMULLQ %ZMM30,%ZMM12,%ZMM12 |
(111) 0x41b01d VGATHERQPD (%R10,%ZMM20,8),%ZMM1{%K4} |
(111) 0x41b024 VPADDQ %ZMM2,%ZMM3,%ZMM23 |
(111) 0x41b02a KMOVB %K1,%K4 |
(111) 0x41b02e VDIVPD %ZMM1,%ZMM5,%ZMM9 |
(111) 0x41b034 VDIVPD %ZMM0,%ZMM10,%ZMM1 |
(111) 0x41b03a VDIVPD %ZMM11,%ZMM10,%ZMM5 |
(111) 0x41b040 VXORPS %XMM10,%XMM10,%XMM10 |
(111) 0x41b045 VPMULLQ %ZMM27,%ZMM13,%ZMM10 |
(111) 0x41b04b VADDPD %ZMM31,%ZMM9,%ZMM4 |
(111) 0x41b051 VGATHERQPD (%R10,%ZMM23,8),%ZMM6{%K5} |
(111) 0x41b058 VXORPS %XMM11,%XMM11,%XMM11 |
(111) 0x41b05d VPMULLQ %ZMM27,%ZMM16,%ZMM11 |
(111) 0x41b063 VDIVPD %ZMM6,%ZMM8,%ZMM8 |
(111) 0x41b069 KMOVB %K1,%K5 |
(111) 0x41b06d VPMULLQ %ZMM30,%ZMM13,%ZMM13 |
(111) 0x41b073 VADDPD %ZMM31,%ZMM8,%ZMM6 |
(111) 0x41b079 VPADDQ %ZMM2,%ZMM21,%ZMM21 |
(111) 0x41b07f VPMULLQ %ZMM30,%ZMM16,%ZMM16 |
(111) 0x41b085 VMULPD %ZMM1,%ZMM4,%ZMM3 |
(111) 0x41b08b VMULPD %ZMM5,%ZMM6,%ZMM0 |
(111) 0x41b091 VBROADCASTSD 0x316f5(%RIP),%ZMM5 |
(111) 0x41b09b VPADDQ %ZMM7,%ZMM10,%ZMM24 |
(111) 0x41b0a1 VXORPS %XMM10,%XMM10,%XMM10 |
(111) 0x41b0a6 VPMULLQ %ZMM27,%ZMM15,%ZMM10 |
(111) 0x41b0ac VSUBPD %ZMM9,%ZMM5,%ZMM1 |
(111) 0x41b0b2 VSUBPD %ZMM8,%ZMM5,%ZMM19 |
(111) 0x41b0b8 VPMULLQ %ZMM30,%ZMM15,%ZMM15 |
(111) 0x41b0be VSUBPD %ZMM8,%ZMM31,%ZMM8 |
(111) 0x41b0c4 VSUBPD %ZMM9,%ZMM31,%ZMM9 |
(111) 0x41b0ca VMOVAPD %ZMM3,0x300(%RSP) |
(111) 0x41b0d2 VPADDQ %ZMM7,%ZMM11,%ZMM3 |
(111) 0x41b0d8 VGATHERQPD (%RDX,%ZMM3,8),%ZMM4{%K4} |
(111) 0x41b0df VGATHERQPD (%RDX,%ZMM24,8),%ZMM6{%K3} |
(111) 0x41b0e6 VGATHERQPD (%RDX,%ZMM21,8),%ZMM5{%K2} |
(111) 0x41b0ed VPADDQ %ZMM2,%ZMM10,%ZMM11 |
(111) 0x41b0f3 VXORPS %XMM10,%XMM10,%XMM10 |
(111) 0x41b0f8 VPMULLQ %ZMM27,%ZMM14,%ZMM10 |
(111) 0x41b0fe VSUBPD %ZMM4,%ZMM6,%ZMM4 |
(111) 0x41b104 VGATHERQPD (%RDX,%ZMM11,8),%ZMM3{%K5} |
(111) 0x41b10b VPMULLQ %ZMM30,%ZMM14,%ZMM14 |
(111) 0x41b111 VSUBPD %ZMM3,%ZMM5,%ZMM3 |
(111) 0x41b117 VPADDQ %ZMM7,%ZMM10,%ZMM11 |
(111) 0x41b11d VGATHERQPD (%RDX,%ZMM11,8),%ZMM10{%K6} |
(111) 0x41b124 VMOVDQA64 %ZMM18,0x240(%RSP) |
(111) 0x41b12c VPADDQ %ZMM2,%ZMM17,%ZMM18 |
(111) 0x41b132 KMOVB %K1,%K6 |
(111) 0x41b136 VGATHERQPD (%RDX,%ZMM18,8),%ZMM11{%K7} |
(111) 0x41b13d VSUBPD %ZMM6,%ZMM10,%ZMM10 |
(111) 0x41b143 KMOVB %K1,%K7 |
(111) 0x41b147 VSUBPD %ZMM5,%ZMM11,%ZMM11 |
(111) 0x41b14d VCMPPD $0x2,%ZMM26,%ZMM10,%K3 |
(111) 0x41b154 VCMPPD $0x2,%ZMM26,%ZMM11,%K2 |
(111) 0x41b15b VBLENDMPD 0x314db(%RIP),%ZMM31,%ZMM17{%K3} |
(111) 0x41b165 KMOVB %K1,%K3 |
(111) 0x41b169 VBLENDMPD 0x314cd(%RIP),%ZMM31,%ZMM18{%K2} |
(111) 0x41b173 VMOVAPD %ZMM17,0x200(%RSP) |
(111) 0x41b17b KMOVB %K1,%K2 |
(111) 0x41b17f VMULPD %ZMM10,%ZMM4,%ZMM17 |
(111) 0x41b185 VMOVAPD %ZMM18,0x1c0(%RSP) |
(111) 0x41b18d VANDPD %ZMM28,%ZMM10,%ZMM10 |
(111) 0x41b193 VANDPD %ZMM28,%ZMM4,%ZMM4 |
(111) 0x41b199 VMULPD %ZMM11,%ZMM3,%ZMM18 |
(111) 0x41b19f VANDPD %ZMM28,%ZMM11,%ZMM11 |
(111) 0x41b1a5 VANDPD %ZMM28,%ZMM3,%ZMM3 |
(111) 0x41b1ab VMULPD 0x1c0(%RSP),%ZMM8,%ZMM8 |
(111) 0x41b1b3 VMOVAPD %ZMM17,0x180(%RSP) |
(111) 0x41b1bb VMULPD %ZMM10,%ZMM1,%ZMM17 |
(111) 0x41b1c1 VMOVAPD %ZMM18,0x140(%RSP) |
(111) 0x41b1c9 VMULPD %ZMM11,%ZMM19,%ZMM18 |
(111) 0x41b1cf VFMADD231PD 0x300(%RSP),%ZMM4,%ZMM17 |
(111) 0x41b1d7 VMINPD %ZMM10,%ZMM4,%ZMM4 |
(111) 0x41b1dd VFMADD231PD %ZMM3,%ZMM0,%ZMM18 |
(111) 0x41b1e3 VMINPD %ZMM11,%ZMM3,%ZMM3 |
(111) 0x41b1e9 VMULPD 0x3148d(%RIP),%ZMM18,%ZMM18 |
(111) 0x41b1f3 VMOVAPD 0x180(%RSP),%ZMM11 |
(111) 0x41b1fb VMULPD 0x3147b(%RIP),%ZMM17,%ZMM17 |
(111) 0x41b205 VCMPPD $0xe,%ZMM26,%ZMM11,%K4 |
(111) 0x41b20c VMULPD 0x200(%RSP),%ZMM9,%ZMM11 |
(111) 0x41b214 VMOVAPD 0x140(%RSP),%ZMM9 |
(111) 0x41b21c VMINPD %ZMM3,%ZMM18,%ZMM3 |
(111) 0x41b222 VCMPPD $0xe,%ZMM26,%ZMM9,%K5 |
(111) 0x41b229 VMINPD %ZMM4,%ZMM17,%ZMM10 |
(111) 0x41b22f VFMADD231PD %ZMM3,%ZMM8,%ZMM5{%K5} |
(111) 0x41b235 VPADDQ %ZMM7,%ZMM13,%ZMM3 |
(111) 0x41b23b KMOVB %K1,%K5 |
(111) 0x41b23f VFMADD231PD %ZMM10,%ZMM11,%ZMM6{%K4} |
(111) 0x41b245 VPADDQ %ZMM2,%ZMM12,%ZMM11 |
(111) 0x41b24b VPADDQ %ZMM2,%ZMM15,%ZMM12 |
(111) 0x41b251 KMOVB %K1,%K4 |
(111) 0x41b255 VMULPD 0x40(%R8,%RAX,1),%ZMM5,%ZMM5 |
(111) 0x41b25d VMULPD (%R8,%RAX,1),%ZMM6,%ZMM6 |
(111) 0x41b264 VMOVUPD %ZMM5,0x40(%RDI,%RAX,1) |
(111) 0x41b26c VMOVUPD %ZMM6,(%RDI,%RAX,1) |
(111) 0x41b273 VGATHERQPD (%RBX,%ZMM11,8),%ZMM13{%K5} |
(111) 0x41b27a VGATHERQPD (%R10,%ZMM23,8),%ZMM10{%K2} |
(111) 0x41b281 VGATHERQPD (%RDX,%ZMM21,8),%ZMM8{%K7} |
(111) 0x41b288 KMOVB %K1,%K7 |
(111) 0x41b28c KMOVB %K1,%K2 |
(111) 0x41b290 VGATHERQPD (%RBX,%ZMM12,8),%ZMM11{%K7} |
(111) 0x41b297 VPADDQ %ZMM7,%ZMM16,%ZMM23 |
(111) 0x41b29d VXORPS %XMM12,%XMM12,%XMM12 |
(111) 0x41b2a2 VPMULLQ 0x240(%RSP),%ZMM30,%ZMM12 |
(111) 0x41b2aa VPADDQ %ZMM7,%ZMM14,%ZMM7 |
(111) 0x41b2b0 VPADDQ %ZMM2,%ZMM12,%ZMM2 |
(111) 0x41b2b6 VGATHERQPD (%R10,%ZMM20,8),%ZMM9{%K3} |
(111) 0x41b2bd VGATHERQPD (%RDX,%ZMM24,8),%ZMM4{%K6} |
(111) 0x41b2c4 KMOVB %K1,%K3 |
(111) 0x41b2c8 KMOVB %K1,%K6 |
(111) 0x41b2cc VGATHERQPD (%RBX,%ZMM3,8),%ZMM20{%K4} |
(111) 0x41b2d3 VGATHERQPD (%RBX,%ZMM7,8),%ZMM15{%K3} |
(111) 0x41b2da VGATHERQPD (%RBX,%ZMM2,8),%ZMM14{%K2} |
(111) 0x41b2e1 VGATHERQPD (%RBX,%ZMM23,8),%ZMM3{%K6} |
(111) 0x41b2e8 VSUBPD %ZMM13,%ZMM14,%ZMM7 |
(111) 0x41b2ee VSUBPD %ZMM20,%ZMM15,%ZMM2 |
(111) 0x41b2f4 VSUBPD %ZMM3,%ZMM20,%ZMM3 |
(111) 0x41b2fa VSUBPD %ZMM11,%ZMM13,%ZMM11 |
(111) 0x41b300 VMULPD %ZMM9,%ZMM4,%ZMM4 |
(111) 0x41b306 VMULPD %ZMM10,%ZMM8,%ZMM8 |
(111) 0x41b30c VCMPPD $0x2,%ZMM26,%ZMM2,%K4 |
(111) 0x41b313 VCMPPD $0x2,%ZMM26,%ZMM7,%K5 |
(111) 0x41b31a VMULPD %ZMM2,%ZMM3,%ZMM15 |
(111) 0x41b320 VANDPD %ZMM28,%ZMM2,%ZMM2 |
(111) 0x41b326 VANDPD %ZMM28,%ZMM3,%ZMM3 |
(111) 0x41b32c VMULPD %ZMM7,%ZMM11,%ZMM12 |
(111) 0x41b332 VANDPD %ZMM28,%ZMM7,%ZMM7 |
(111) 0x41b338 VANDPD %ZMM28,%ZMM11,%ZMM11 |
(111) 0x41b33e VMULPD %ZMM7,%ZMM19,%ZMM19 |
(111) 0x41b344 VBLENDMPD 0x312f2(%RIP),%ZMM31,%ZMM14{%K4} |
(111) 0x41b34e VBLENDMPD 0x312e8(%RIP),%ZMM31,%ZMM24{%K5} |
(111) 0x41b358 VMULPD %ZMM2,%ZMM1,%ZMM1 |
(111) 0x41b35e VCMPPD $0xe,%ZMM26,%ZMM15,%K6 |
(111) 0x41b365 VCMPPD $0xe,%ZMM26,%ZMM12,%K7 |
(111) 0x41b36c VANDPD %ZMM28,%ZMM5,%ZMM12 |
(111) 0x41b372 VFMADD132PD %ZMM11,%ZMM19,%ZMM0 |
(111) 0x41b378 VMINPD %ZMM7,%ZMM11,%ZMM11 |
(111) 0x41b37e VDIVPD %ZMM8,%ZMM12,%ZMM10 |
(111) 0x41b384 VFMADD231PD 0x300(%RSP),%ZMM3,%ZMM1 |
(111) 0x41b38c VMINPD %ZMM2,%ZMM3,%ZMM3 |
(111) 0x41b392 VMULPD 0x312e4(%RIP),%ZMM0,%ZMM0 |
(111) 0x41b39c VMULPD 0x312da(%RIP),%ZMM1,%ZMM15 |
(111) 0x41b3a6 VANDPD %ZMM28,%ZMM6,%ZMM1 |
(111) 0x41b3ac VDIVPD %ZMM4,%ZMM1,%ZMM9 |
(111) 0x41b3b2 VMINPD %ZMM11,%ZMM0,%ZMM7 |
(111) 0x41b3b8 VSUBPD %ZMM9,%ZMM31,%ZMM11 |
(111) 0x41b3be VMINPD %ZMM3,%ZMM15,%ZMM2 |
(111) 0x41b3c4 VMULPD %ZMM24,%ZMM7,%ZMM0 |
(111) 0x41b3ca VSUBPD %ZMM10,%ZMM31,%ZMM7 |
(111) 0x41b3d0 VMULPD %ZMM14,%ZMM2,%ZMM14 |
(111) 0x41b3d6 VFMADD231PD %ZMM7,%ZMM0,%ZMM13{%K7} |
(111) 0x41b3dc VFMADD231PD %ZMM11,%ZMM14,%ZMM20{%K6} |
(111) 0x41b3e2 VMULPD %ZMM6,%ZMM20,%ZMM21 |
(111) 0x41b3e8 VMULPD %ZMM5,%ZMM13,%ZMM6 |
(111) 0x41b3ee VMOVUPD %ZMM21,(%RCX,%RAX,1) |
(111) 0x41b3f5 VMOVUPD %ZMM6,0x40(%RCX,%RAX,1) |
(111) 0x41b3fd SUB $-0x80,%RAX |
(111) 0x41b401 CMP %R11,%RAX |
(111) 0x41b404 JNE 41af18 |
0x41b40a MOV %R12D,%R8D |
0x41b40d MOV %RBX,0x1c0(%RSP) |
0x41b415 AND $-0x10,%R8D |
0x41b419 ADD %R8D,0x2f8(%RSP) |
0x41b421 ADD %R8D,0x2fc(%RSP) |
0x41b429 AND $0xf,%R12D |
0x41b42d JE 41b6f0 |
0x41b433 MOV 0x2c0(%RSP),%R12 |
0x41b43b MOV 0x68(%RSP),%RDI |
0x41b440 VXORPD %XMM7,%XMM7,%XMM7 |
0x41b444 MOV 0x78(%RSP),%R11 |
0x41b449 MOV 0x60(%RSP),%RSI |
0x41b44e LEA (%R12,%RDI,8),%RCX |
0x41b452 MOV 0x70(%RSP),%R8 |
0x41b457 MOV 0x58(%RSP),%R12 |
0x41b45c LEA (%R11,%RSI,8),%RBX |
0x41b460 MOV 0x2cc(%RSP),%R11D |
0x41b468 MOVSXD 0x2fc(%RSP),%RAX |
0x41b470 MOV %RCX,0x280(%RSP) |
0x41b478 MOV 0x2e0(%RSP),%ECX |
0x41b47f LEA (%R8,%R12,8),%RDI |
0x41b483 VMOVSD 0x311b5(%RIP),%XMM8 |
0x41b48b MOV %RBX,0x240(%RSP) |
0x41b493 MOV %RDI,0x300(%RSP) |
0x41b49b CMP %R11D,%ECX |
0x41b49e CMOVG %R11D,%ECX |
0x41b4a2 MOVSXD %ECX,%RSI |
0x41b4a5 MOV %RSI,0x200(%RSP) |
0x41b4ad MOV 0x1c0(%RSP),%RSI |
0x41b4b5 JMP 41b69b |
(110) 0x41b4c0 MOV 0x2e8(%RSP),%RCX |
(110) 0x41b4c8 MOV 0x2d0(%RSP),%R8 |
(110) 0x41b4d0 MOV 0x2f0(%RSP),%RDI |
(110) 0x41b4d8 MOV %RCX,%R12 |
(110) 0x41b4db MOV 0x2d8(%RSP),%R11 |
(110) 0x41b4e3 VMOVSD (%R15),%XMM15 |
(110) 0x41b4e8 VANDPD 0x31290(%RIP),%XMM13,%XMM5 |
(110) 0x41b4f0 VMOVSD 0x31298(%RIP),%XMM1 |
(110) 0x41b4f8 IMUL %RCX,%R11 |
(110) 0x41b4fc VDIVSD (%R14,%R12,8),%XMM15,%XMM3 |
(110) 0x41b502 MOV %R13,%R12 |
(110) 0x41b505 IMUL %RCX,%R12 |
(110) 0x41b509 ADD %RAX,%R11 |
(110) 0x41b50c LEA (%R10,%R11,8),%RBX |
(110) 0x41b510 MOV %R13,%R11 |
(110) 0x41b513 ADD %RAX,%R12 |
(110) 0x41b516 IMUL %R8,%R11 |
(110) 0x41b51a LEA (%RDX,%R12,8),%R12 |
(110) 0x41b51e VDIVSD (%RBX),%XMM5,%XMM0 |
(110) 0x41b522 VMOVSD 0x3126e(%RIP),%XMM5 |
(110) 0x41b52a VMOVSD (%R12),%XMM11 |
(110) 0x41b530 VADDSD 0x31260(%RIP),%XMM0,%XMM2 |
(110) 0x41b538 VSUBSD %XMM0,%XMM1,%XMM9 |
(110) 0x41b53c ADD %RAX,%R11 |
(110) 0x41b53f VSUBSD (%RDX,%R11,8),%XMM11,%XMM4 |
(110) 0x41b545 VMULSD %XMM2,%XMM3,%XMM14 |
(110) 0x41b549 MOV %R13,%R11 |
(110) 0x41b54c IMUL %RDI,%R11 |
(110) 0x41b550 ADD %RAX,%R11 |
(110) 0x41b553 VMOVSD (%RDX,%R11,8),%XMM12 |
(110) 0x41b559 VSUBSD %XMM11,%XMM12,%XMM6 |
(110) 0x41b55e VMULSD %XMM6,%XMM4,%XMM3 |
(110) 0x41b562 VCMPSD $0x6,%XMM7,%XMM6,%XMM10 |
(110) 0x41b567 VBLENDVPD %XMM10,%XMM5,%XMM8,%XMM15 |
(110) 0x41b56d VCOMISD %XMM7,%XMM3 |
(110) 0x41b571 JBE 41b5b2 |
(110) 0x41b573 VANDPD 0x31205(%RIP),%XMM6,%XMM1 |
(110) 0x41b57b VANDPD 0x311fd(%RIP),%XMM4,%XMM2 |
(110) 0x41b583 VMOVSD 0x3120d(%RIP),%XMM5 |
(110) 0x41b58b VMULSD %XMM9,%XMM1,%XMM4 |
(110) 0x41b590 VMINSD %XMM1,%XMM2,%XMM6 |
(110) 0x41b594 VSUBSD %XMM0,%XMM5,%XMM0 |
(110) 0x41b598 VFMADD231SD %XMM14,%XMM2,%XMM4 |
(110) 0x41b59d VMULSD 0x310db(%RIP),%XMM4,%XMM12 |
(110) 0x41b5a5 VMINSD %XMM6,%XMM12,%XMM10 |
(110) 0x41b5a9 VMULSD %XMM0,%XMM10,%XMM3 |
(110) 0x41b5ad VFMADD231SD %XMM3,%XMM15,%XMM11 |
(110) 0x41b5b2 VMULSD %XMM11,%XMM13,%XMM13 |
(110) 0x41b5b7 IMUL %R9,%RCX |
(110) 0x41b5bb MOV 0x240(%RSP),%R11 |
(110) 0x41b5c3 VMOVSD 0x311cd(%RIP),%XMM10 |
(110) 0x41b5cb IMUL %R9,%RDI |
(110) 0x41b5cf IMUL %R9,%R8 |
(110) 0x41b5d3 ADD %RAX,%RCX |
(110) 0x41b5d6 ADD %RAX,%RDI |
(110) 0x41b5d9 VMOVSD %XMM13,(%R11,%RAX,8) |
(110) 0x41b5df ADD %RAX,%R8 |
(110) 0x41b5e2 VMOVSD (%RSI,%RCX,8),%XMM2 |
(110) 0x41b5e7 VMOVSD (%RSI,%RDI,8),%XMM1 |
(110) 0x41b5ec VMOVSD (%R12),%XMM15 |
(110) 0x41b5f2 VMOVSD (%RBX),%XMM11 |
(110) 0x41b5f6 VSUBSD %XMM2,%XMM1,%XMM12 |
(110) 0x41b5fa VSUBSD (%RSI,%R8,8),%XMM2,%XMM4 |
(110) 0x41b600 VMULSD %XMM12,%XMM4,%XMM5 |
(110) 0x41b605 VCMPSD $0x6,%XMM7,%XMM12,%XMM6 |
(110) 0x41b60a VBLENDVPD %XMM6,%XMM10,%XMM8,%XMM0 |
(110) 0x41b610 VCOMISD %XMM7,%XMM5 |
(110) 0x41b614 JBE 41b668 |
(110) 0x41b616 VMULSD %XMM11,%XMM15,%XMM3 |
(110) 0x41b61b VANDPD 0x3115d(%RIP),%XMM12,%XMM1 |
(110) 0x41b623 VANDPD 0x31155(%RIP),%XMM4,%XMM4 |
(110) 0x41b62b VMOVSD 0x31165(%RIP),%XMM11 |
(110) 0x41b633 VMULSD %XMM9,%XMM1,%XMM9 |
(110) 0x41b638 VMINSD %XMM1,%XMM4,%XMM10 |
(110) 0x41b63c VFMADD231SD %XMM14,%XMM4,%XMM9 |
(110) 0x41b641 VANDPD 0x31137(%RIP),%XMM13,%XMM14 |
(110) 0x41b649 VDIVSD %XMM3,%XMM14,%XMM15 |
(110) 0x41b64d VMULSD 0x3102b(%RIP),%XMM9,%XMM6 |
(110) 0x41b655 VMINSD %XMM10,%XMM6,%XMM5 |
(110) 0x41b65a VSUBSD %XMM15,%XMM11,%XMM12 |
(110) 0x41b65f VMULSD %XMM5,%XMM12,%XMM4 |
(110) 0x41b663 VFMADD231SD %XMM4,%XMM0,%XMM2 |
(110) 0x41b668 VMULSD %XMM13,%XMM2,%XMM13 |
(110) 0x41b66d MOV 0x300(%RSP),%R8 |
(110) 0x41b675 MOV 0x2f8(%RSP),%ECX |
(110) 0x41b67c MOV 0x2fc(%RSP),%EDI |
(110) 0x41b683 MOV 0x2e4(%RSP),%EBX |
(110) 0x41b68a SUB %EDI,%ECX |
(110) 0x41b68c VMOVSD %XMM13,(%R8,%RAX,8) |
(110) 0x41b692 INC %RAX |
(110) 0x41b695 ADD %EAX,%ECX |
(110) 0x41b697 CMP %EBX,%ECX |
(110) 0x41b699 JAE 41b6f0 |
(110) 0x41b69b MOV 0x280(%RSP),%RBX |
(110) 0x41b6a3 VMOVSD (%RBX,%RAX,8),%XMM13 |
(110) 0x41b6a8 VCOMISD %XMM7,%XMM13 |
(110) 0x41b6ac JA 41b4c0 |
(110) 0x41b6b2 MOV 0x200(%RSP),%R8 |
(110) 0x41b6ba MOV 0x2e8(%RSP),%RDI |
(110) 0x41b6c2 MOV 0x2f0(%RSP),%RCX |
(110) 0x41b6ca MOV %R8,%R12 |
(110) 0x41b6cd JMP 41b4db |
0x41b6f0 MOV 0x2e4(%RSP),%R12D |
0x41b6f8 MOV %R12D,0x2f8(%RSP) |
0x41b700 MOV 0x2f0(%RSP),%RAX |
0x41b708 MOV 0x2cc(%RSP),%EDX |
0x41b70f MOV 0x2c8(%RSP),%ECX |
0x41b716 MOV %RAX,0x2e8(%RSP) |
0x41b71e CMP %EDX,0x50(%RSP) |
0x41b722 JLE 41b6d8 |
0x41b724 MOV 0x44(%RSP),%R13D |
0x41b729 MOV 0x2f8(%RSP),%ESI |
0x41b730 MOV 0x4c(%RSP),%R9D |
0x41b735 MOV 0x54(%RSP),%R10D |
0x41b73a SUB %ESI,%R13D |
0x41b73d MOV %R9D,0x2fc(%RSP) |
0x41b745 JMP 41ad20 |
0x41b750 MOV 0x2e8(%RSP),%R11 |
0x41b758 LEA 0x1(%RCX),%EDX |
0x41b75b MOV %EDX,0x2c8(%RSP) |
0x41b762 INC %R11 |
0x41b765 MOV %R11,0x2f0(%RSP) |
0x41b76d JMP 41b700 |
/software/compilers/gcc/gcc-13.1.0-full+isl+binutils/include/c++/13.1.0/bits/stl_algobase.h: 238 - 238 |
-------------------------------------------------------------------------------- |
238: if (__b < __a) |
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/advec_cell.cpp: 159 - 202 |
-------------------------------------------------------------------------------- |
159: for (int i = (x_min + 1); i < (x_max + 2); i++) |
160: ({ |
161: int upwind, donor, downwind, dif; |
162: double sigmat, sigma3, sigma4, sigmav, sigmam, diffuw, diffdw, limiter, wind; |
163: if (vol_flux_y(i, j) > 0.0) { |
[...] |
169: upwind = std::min(j + 1, y_max + 2); |
170: donor = j; |
171: downwind = j - 1; |
172: dif = upwind; |
173: } |
174: sigmat = std::fabs(vol_flux_y(i, j)) / pre_vol(i, donor); |
175: sigma3 = (1.0 + sigmat) * (vertexdy[j] / vertexdy[dif]); |
176: sigma4 = 2.0 - sigmat; |
177: sigmav = sigmat; |
178: diffuw = density1(i, donor) - density1(i, upwind); |
179: diffdw = density1(i, downwind) - density1(i, donor); |
180: wind = 1.0; |
181: if (diffdw <= 0.0) wind = -1.0; |
182: if (diffuw * diffdw > 0.0) { |
183: limiter = (1.0 - sigmav) * wind * |
184: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
185: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
186: } else { |
187: limiter = 0.0; |
188: } |
189: mass_flux_y(i, j) = vol_flux_y(i, j) * (density1(i, donor) + limiter); |
190: sigmam = std::fabs(mass_flux_y(i, j)) / (density1(i, donor) * pre_vol(i, donor)); |
191: diffuw = energy1(i, donor) - energy1(i, upwind); |
192: diffdw = energy1(i, downwind) - energy1(i, donor); |
193: wind = 1.0; |
194: if (diffdw <= 0.0) wind = -1.0; |
195: if (diffuw * diffdw > 0.0) { |
196: limiter = (1.0 - sigmam) * wind * |
197: std::fmin(std::fmin(std::fabs(diffuw), std::fabs(diffdw)), |
198: one_by_six * (sigma3 * std::fabs(diffuw) + sigma4 * std::fabs(diffdw))); |
199: } else { |
200: limiter = 0.0; |
201: } |
202: ener_flux(i, j) = mass_flux_y(i, j) * (energy1(i, donor) + limiter); |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 4.05 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.39 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.27 |
Bottlenecks | micro-operation queue, |
Function | advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D |
Source | stl_algobase.h:238-238,context.h:46-46,context.h:69-69,advec_cell.cpp:159-159,advec_cell.cpp:163-163,advec_cell.cpp:169-171,advec_cell.cpp:174-175,advec_cell.cpp:178-180,advec_cell.cpp:202-202 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 25.00 |
CQA cycles if no scalar integer | 6.17 |
CQA cycles if FP arith vectorized | 25.00 |
CQA cycles if fully vectorized | 17.96 |
Front-end cycles | 25.00 |
DIV/SQRT cycles | 8.80 |
P0 cycles | 8.87 |
P1 cycles | 19.67 |
P2 cycles | 19.67 |
P3 cycles | 15.00 |
P4 cycles | 8.80 |
P5 cycles | 8.80 |
P6 cycles | 15.00 |
P7 cycles | 15.00 |
P8 cycles | 15.00 |
P9 cycles | 8.73 |
P10 cycles | 19.67 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 25.45 |
Stall cycles (UFS) | 0.00 |
Nb insns | 147.00 |
Nb uops | 150.00 |
Nb loads | 59.00 |
Nb stores | 30.00 |
Nb stack references | 36.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 35.36 |
Bytes prefetched | 0.00 |
Bytes loaded | 456.00 |
Bytes stored | 428.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 13.21 |
Vectorization ratio load | 7.69 |
Vectorization ratio store | 13.33 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 10.53 |
Vector-efficiency ratio all | 19.10 |
Vector-efficiency ratio load | 16.83 |
Vector-efficiency ratio store | 22.29 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 100.00 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 11.18 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 4.05 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 1.39 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.27 |
Bottlenecks | micro-operation queue, |
Function | advec_cell_kernel(int, int, int, int, int, int, clover::Buffer1D |
Source | stl_algobase.h:238-238,context.h:46-46,context.h:69-69,advec_cell.cpp:159-159,advec_cell.cpp:163-163,advec_cell.cpp:169-171,advec_cell.cpp:174-175,advec_cell.cpp:178-180,advec_cell.cpp:202-202 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 25.00 |
CQA cycles if no scalar integer | 6.17 |
CQA cycles if FP arith vectorized | 25.00 |
CQA cycles if fully vectorized | 17.96 |
Front-end cycles | 25.00 |
DIV/SQRT cycles | 8.80 |
P0 cycles | 8.87 |
P1 cycles | 19.67 |
P2 cycles | 19.67 |
P3 cycles | 15.00 |
P4 cycles | 8.80 |
P5 cycles | 8.80 |
P6 cycles | 15.00 |
P7 cycles | 15.00 |
P8 cycles | 15.00 |
P9 cycles | 8.73 |
P10 cycles | 19.67 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 25.45 |
Stall cycles (UFS) | 0.00 |
Nb insns | 147.00 |
Nb uops | 150.00 |
Nb loads | 59.00 |
Nb stores | 30.00 |
Nb stack references | 36.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 35.36 |
Bytes prefetched | 0.00 |
Bytes loaded | 456.00 |
Bytes stored | 428.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 13.21 |
Vectorization ratio load | 7.69 |
Vectorization ratio store | 13.33 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 100.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 10.53 |
Vector-efficiency ratio all | 19.10 |
Vector-efficiency ratio load | 16.83 |
Vector-efficiency ratio store | 22.29 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 100.00 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 11.18 |
Path / |
nb instructions | 147 |
nb uops | 150 |
loop length | 801 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 12 |
nb stack references | 36 |
micro-operation queue | 25.00 cycles |
front end | 25.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.80 | 8.87 | 19.67 | 19.67 | 15.00 | 8.80 | 8.80 | 15.00 | 15.00 | 15.00 | 8.73 | 19.67 |
cycles | 8.80 | 8.87 | 19.67 | 19.67 | 15.00 | 8.80 | 8.80 | 15.00 | 15.00 | 15.00 | 8.73 | 19.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 25.45 |
Stall cycles | 0.00 |
Front-end | 25.00 |
Dispatch | 19.67 |
Overall L1 | 25.00 |
all | 10% |
load | 10% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 40% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 13% |
load | 7% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
all | 19% |
load | 18% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 17% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 19% |
load | 16% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %R10D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVBE %R13D,%R10D | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1-2 | 1 |
MOV 0x2f8(%RSP),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R13,%R10,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x3(%RCX),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x2e4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x2cc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EAX,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 41b750 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6.lto_priv.0+0xb50> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x30(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R14),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R9),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,0x2d8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2e8(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x2c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R15),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %R11,%R8 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R14,%R11,8),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R11,%RBX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x20(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,0x2f0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %R11,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%RCX),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOVSXD %ECX,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EAX,0x2c8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %EAX,%RDI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RCX,0x2d0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x2cc(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %ESI,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA -0x1(%R12),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0x2e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMOVLE %RAX,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP $0xe,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 41b433 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6.lto_priv.0+0x833> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x2fc(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c0(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RDI,%ZMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RCX,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KXNORB %K1,%K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD 0x2fc(%RSP),%ZMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x78(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %R13,%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%ZMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RBX,%RAX,1),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x70(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDD 0x31769(%RIP),%ZMM0,%ZMM7 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
VPBROADCASTQ 0x2d0(%RSP),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
LEA (%RSI,%R8,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x10,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%R11,%RAX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x2d8(%RSP),%ZMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV %R12D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%RDI,%RBX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x4,%R11D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VPBROADCASTD %ESI,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x2f0(%RSP),%ZMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x318a9(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x318b7(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x1c0(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDQA32 %ZMM7,0x280(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
SAL $0x7,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDQA64 %ZMM1,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM26,%XMM26,%XMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA64 %ZMM2,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VMOVDQA32 %ZMM3,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RBX,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x10,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %R8D,0x2f8(%RSP) | 2 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
ADD %R8D,0x2fc(%RSP) | 2 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
AND $0xf,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 41b6f0 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6.lto_priv.0+0xaf0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x2c0(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x68(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x78(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R12,%RDI,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x70(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R11,%RSI,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x2cc(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x2fc(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x280(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x2e0(%RSP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x311b5(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,0x240(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x300(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R11D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %R11D,%ECX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD %ECX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RSI,0x200(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x1c0(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 41b69b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6.lto_priv.0+0xa9b> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV 0x2e4(%RSP),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12D,0x2f8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x2f0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2cc(%RSP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c8(%RSP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x2e8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,0x50(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JLE 41b6d8 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6.lto_priv.0+0xad8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x44(%RSP),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2f8(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x4c(%RSP),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x54(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %ESI,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9D,0x2fc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 41ad20 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6.lto_priv.0+0x120> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV 0x2e8(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x2c8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R11,0x2f0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 41b700 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6.lto_priv.0+0xb00> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
nb instructions | 147 |
nb uops | 150 |
loop length | 801 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 3 |
used ymm registers | 0 |
used zmm registers | 12 |
nb stack references | 36 |
micro-operation queue | 25.00 cycles |
front end | 25.00 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 8.80 | 8.87 | 19.67 | 19.67 | 15.00 | 8.80 | 8.80 | 15.00 | 15.00 | 15.00 | 8.73 | 19.67 |
cycles | 8.80 | 8.87 | 19.67 | 19.67 | 15.00 | 8.80 | 8.80 | 15.00 | 15.00 | 15.00 | 8.73 | 19.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 25.45 |
Stall cycles | 0.00 |
Front-end | 25.00 |
Dispatch | 19.67 |
Overall L1 | 25.00 |
all | 10% |
load | 10% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 40% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 50% |
all | 13% |
load | 7% |
store | 13% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
all | 19% |
load | 18% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 9% |
all | 17% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 18% |
all | 19% |
load | 16% |
store | 22% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CMP %R10D,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVBE %R13D,%R10D | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1-2 | 1 |
MOV 0x2f8(%RSP),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R10D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%R13,%R10,1),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
LEA 0x3(%RCX),%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EAX,0x2e4(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R10D,0x2cc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EAX,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JAE 41b750 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6.lto_priv.0+0xb50> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x30(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RSP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R14),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%R9),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,0x2d8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2e8(%RSP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x2c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RAX),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R9,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R15),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x8(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,0x78(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %R11,%R8 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R14,%R11,8),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
IMUL %R11,%RBX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x20(%RSP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,0x2f0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R13),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R13),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R8,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
IMUL %R11,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RAX,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%RCX),%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOVSXD %ECX,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %EAX,0x2c8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %EAX,%RDI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RCX,0x2d0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RSI,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x48(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x2cc(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD $0x2,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOVSXD %ESI,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
LEA -0x1(%R12),%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %ECX,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %EAX,0x2e0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMOVLE %RAX,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
CMP $0xe,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JBE 41b433 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6.lto_priv.0+0x833> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOVSXD 0x2fc(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c0(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %RDI,%ZMM29 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RCX,%ZMM22 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KXNORB %K1,%K1,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPBROADCASTD 0x2fc(%RSP),%ZMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x78(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTQ %R13,%ZMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R9,%ZMM30 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RBX,%RAX,1),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R11,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x70(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPADDD 0x31769(%RIP),%ZMM0,%ZMM7 | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
VPBROADCASTQ 0x2d0(%RSP),%ZMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
LEA (%RSI,%R8,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x10,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%R11,%RAX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPBROADCASTQ 0x2d8(%RSP),%ZMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV %R12D,%R11D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA (%RDI,%RBX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x4,%R11D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VPBROADCASTD %ESI,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ 0x2f0(%RSP),%ZMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x318a9(%RIP),%ZMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x318b7(%RIP),%ZMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
MOV 0x1c0(%RSP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDQA32 %ZMM7,0x280(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
SAL $0x7,%R11 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
VMOVDQA64 %ZMM1,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM26,%XMM26,%XMM26 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDQA64 %ZMM2,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
VMOVDQA32 %ZMM3,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 1 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R12D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RBX,0x1c0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
AND $-0x10,%R8D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
ADD %R8D,0x2f8(%RSP) | 2 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
ADD %R8D,0x2fc(%RSP) | 2 | 0.20 | 0.20 | 0.33 | 0.33 | 0.50 | 0.20 | 0.20 | 0.50 | 0.50 | 0.50 | 0.20 | 0.33 | 1 | 0.50 |
AND $0xf,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 41b6f0 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6.lto_priv.0+0xaf0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x2c0(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x68(%RSP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x78(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R12,%RDI,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x70(%RSP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x58(%RSP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R11,%RSI,8),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x2cc(%RSP),%R11D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD 0x2fc(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x280(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x2e0(%RSP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R8,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x311b5(%RIP),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RBX,0x240(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x300(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R11D,%ECX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVG %R11D,%ECX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOVSXD %ECX,%RSI | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RSI,0x200(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x1c0(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 41b69b <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6.lto_priv.0+0xa9b> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV 0x2e4(%RSP),%R12D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12D,0x2f8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x2f0(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2cc(%RSP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2c8(%RSP),%ECX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,0x2e8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %EDX,0x50(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JLE 41b6d8 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6.lto_priv.0+0xad8> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x44(%RSP),%R13D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x2f8(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x4c(%RSP),%R9D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x54(%RSP),%R10D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %ESI,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9D,0x2fc(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 41ad20 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6.lto_priv.0+0x120> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV 0x2e8(%RSP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RCX),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %EDX,0x2c8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
INC %R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R11,0x2f0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 41b700 <_Z17advec_cell_kerneliiiiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_._omp_fn.6.lto_priv.0+0xb00> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |