Loop Id: 1197 | Module: exec | Source: par_lr_interp.c:1221-1675 [...] | Coverage: 0.15% |
---|
Loop Id: 1197 | Module: exec | Source: par_lr_interp.c:1221-1675 [...] | Coverage: 0.15% |
---|
0x45b220 MOV -0xb8(%RBP),%RDX |
0x45b227 MOV (%RDX),%RDX |
0x45b22a VMOVSD (%RDX,%RCX,8),%XMM5 |
0x45b22f MOV -0x68(%RBP),%RSI |
0x45b233 VADDSD (%RSI,%R13,8),%XMM5,%XMM5 |
0x45b239 VMOVSD %XMM5,(%RDX,%RCX,8) |
0x45b23e MOV -0x30(%RBP),%RDX |
0x45b242 MOV -0x38(%RBP),%R12 |
0x45b246 INC %R13 |
0x45b249 CMP %RBX,%R13 |
0x45b24c JE 45ba24 |
0x45b252 MOV (%RAX,%R13,8),%RDI |
0x45b256 MOV (%R14,%RDI,8),%RCX |
0x45b25a CMP %R11,%RCX |
0x45b25d JGE 45b220 |
0x45b25f CMP %R9,%RCX |
0x45b262 JNE 45b340 |
0x45b268 MOV -0x190(%RBP),%RDX |
0x45b26f MOV (%RDX,%RDI,8),%R12 |
0x45b273 VPXOR %XMM5,%XMM5,%XMM5 |
0x45b277 XOR %ECX,%ECX |
0x45b279 MOV -0x68(%RBP),%RSI |
0x45b27d VUCOMISD (%RSI,%R12,8),%XMM5 |
0x45b283 MOV %RDI,-0xa8(%RBP) |
0x45b28a MOV 0x8(%RDX,%RDI,8),%R9 |
0x45b28f MOV -0x30(%RBP),%RDX |
0x45b293 SETBE %CL |
0x45b296 ADD %RCX,%RCX |
0x45b299 DEC %RCX |
0x45b29c MOV %RCX,-0x78(%RBP) |
0x45b2a0 LEA 0x1(%R12),%R8 |
0x45b2a5 CMP %R9,%R8 |
0x45b2a8 MOV -0xd8(%RBP),%RDI |
0x45b2af JGE 45b3a0 |
0x45b2b5 VCVTSI2SDQ -0x78(%RBP),%XMM9,%XMM6 |
0x45b2bb MOV %R12,%R10 |
0x45b2be NOT %R10 |
0x45b2c1 ADD %R9,%R10 |
0x45b2c4 VXORPD %XMM5,%XMM5,%XMM5 |
0x45b2c8 CMP $0x4,%R10 |
0x45b2cc JAE 45b640 |
0x45b2d2 MOV %R10,%RCX |
0x45b2d5 AND $-0x4,%RCX |
0x45b2d9 CMP %R10,%RCX |
0x45b2dc JAE 45b3a0 |
0x45b2e2 ADD %R12,%RCX |
0x45b2e5 INC %RCX |
0x45b2e8 JMP 45b326 |
(1203) 0x45b300 MOV -0x68(%RBP),%RSI |
(1203) 0x45b304 VMOVSD (%RSI,%RCX,8),%XMM1 |
(1203) 0x45b309 VMULSD %XMM6,%XMM1,%XMM7 |
(1203) 0x45b30d VADDSD %XMM5,%XMM1,%XMM1 |
(1203) 0x45b311 VCMPSD $0x1,%XMM0,%XMM7,%K1 |
(1203) 0x45b318 VMOVSD %XMM1,%XMM5,%XMM5{%K1} |
(1203) 0x45b31e INC %RCX |
(1203) 0x45b321 CMP %RCX,%R9 |
(1203) 0x45b324 JE 45b3a0 |
(1203) 0x45b326 MOV (%RAX,%RCX,8),%RSI |
(1203) 0x45b32a CMP %R11,(%R14,%RSI,8) |
(1203) 0x45b32e JGE 45b300 |
(1203) 0x45b330 CMP %RDX,%RSI |
(1203) 0x45b333 JNE 45b31e |
(1203) 0x45b335 JMP 45b300 |
0x45b340 CMPQ $-0x3,(%R8,%RDI,8) |
0x45b345 JE 45b242 |
0x45b34b CMPQ $0x1,-0x188(%RBP) |
0x45b353 JE 45b372 |
0x45b355 MOV -0x180(%RBP),%RDX |
0x45b35c MOV -0x30(%RBP),%RCX |
0x45b360 MOV (%RDX,%RCX,8),%RCX |
0x45b364 CMP (%RDX,%RDI,8),%RCX |
0x45b368 MOV -0x30(%RBP),%RDX |
0x45b36c JNE 45b242 |
0x45b372 MOV -0x68(%RBP),%RCX |
0x45b376 VADDSD (%RCX,%R13,8),%XMM4,%XMM4 |
0x45b37c JMP 45b242 |
0x45b3a0 MOV -0xd0(%RBP),%RCX |
0x45b3a7 MOV (%RCX),%RCX |
0x45b3aa MOV %RCX,-0xc8(%RBP) |
0x45b3b1 CMP $0x2,%RCX |
0x45b3b5 JL 45b460 |
0x45b3bb MOV -0x170(%RBP),%RCX |
0x45b3c2 MOV -0xa8(%RBP),%RSI |
0x45b3c9 MOV (%RCX,%RSI,8),%R10 |
0x45b3cd MOV 0x8(%RCX,%RSI,8),%RCX |
0x45b3d2 MOV %RCX,%RBX |
0x45b3d5 SUB %R10,%RBX |
0x45b3d8 JLE 45b460 |
0x45b3de VCVTSI2SDQ -0x78(%RBP),%XMM9,%XMM6 |
0x45b3e4 CMP $0x4,%RBX |
0x45b3e8 MOV %RCX,-0x140(%RBP) |
0x45b3ef MOV %RBX,-0x118(%RBP) |
0x45b3f6 JAE 45b760 |
0x45b3fc MOV -0x118(%RBP),%RSI |
0x45b403 MOV %RSI,%RCX |
0x45b406 AND $-0x4,%RCX |
0x45b40a CMP %RSI,%RCX |
0x45b40d MOV -0x140(%RBP),%RSI |
0x45b414 JAE 45b460 |
0x45b416 ADD %RCX,%R10 |
0x45b419 JMP 45b42c |
(1201) 0x45b420 MOV -0x30(%RBP),%RDX |
(1201) 0x45b424 INC %R10 |
(1201) 0x45b427 CMP %R10,%RSI |
(1201) 0x45b42a JE 45b460 |
(1201) 0x45b42c MOV (%RDI,%R10,8),%RCX |
(1201) 0x45b430 MOV -0x38(%RBP),%RDX |
(1201) 0x45b434 CMP %R15,(%RDX,%RCX,8) |
(1201) 0x45b438 JL 45b420 |
(1201) 0x45b43a MOV -0x48(%RBP),%RCX |
(1201) 0x45b43e VMOVSD (%RCX,%R10,8),%XMM1 |
(1201) 0x45b444 VMULSD %XMM6,%XMM1,%XMM7 |
(1201) 0x45b448 VADDSD %XMM5,%XMM1,%XMM1 |
(1201) 0x45b44c VCMPSD $0x1,%XMM0,%XMM7,%K1 |
(1201) 0x45b453 VMOVSD %XMM1,%XMM5,%XMM5{%K1} |
(1201) 0x45b459 JMP 45b420 |
0x45b460 VUCOMISD %XMM0,%XMM5 |
0x45b464 MOV -0x68(%RBP),%RCX |
0x45b468 VMOVSD (%RCX,%R13,8),%XMM6 |
0x45b46e JE 45b520 |
0x45b474 VDIVSD %XMM5,%XMM6,%XMM5 |
0x45b478 CMP %R9,%R8 |
0x45b47b JGE 45b540 |
0x45b481 VCVTSI2SDQ -0x78(%RBP),%XMM9,%XMM6 |
0x45b487 MOV -0xf8(%RBP),%R10 |
0x45b48e JMP 45b4ac |
(1200) 0x45b4a0 INC %R8 |
(1200) 0x45b4a3 CMP %R8,%R9 |
(1200) 0x45b4a6 JE 45b547 |
(1200) 0x45b4ac MOV (%RAX,%R8,8),%RCX |
(1200) 0x45b4b0 MOV (%R14,%RCX,8),%RSI |
(1200) 0x45b4b4 CMP %R11,%RSI |
(1200) 0x45b4b7 JL 45b4e9 |
(1200) 0x45b4b9 MOV -0x68(%RBP),%RBX |
(1200) 0x45b4bd VMOVSD (%RBX,%R8,8),%XMM7 |
(1200) 0x45b4c3 VMULSD %XMM6,%XMM7,%XMM1 |
(1200) 0x45b4c7 VUCOMISD %XMM0,%XMM1 |
(1200) 0x45b4cb JAE 45b4e9 |
(1200) 0x45b4cd MOV -0xb8(%RBP),%RDI |
(1200) 0x45b4d4 MOV (%RDI),%RBX |
(1200) 0x45b4d7 MOV -0xd8(%RBP),%RDI |
(1200) 0x45b4de VFMADD213SD (%RBX,%RSI,8),%XMM5,%XMM7 |
(1200) 0x45b4e4 VMOVSD %XMM7,(%RBX,%RSI,8) |
(1200) 0x45b4e9 CMP %RDX,%RCX |
(1200) 0x45b4ec JNE 45b4a0 |
(1200) 0x45b4ee MOV -0x68(%RBP),%RCX |
(1200) 0x45b4f2 VMOVSD (%RCX,%R8,8),%XMM1 |
(1200) 0x45b4f8 VMULSD %XMM6,%XMM1,%XMM7 |
(1200) 0x45b4fc VFMADD213SD %XMM4,%XMM5,%XMM1 |
(1200) 0x45b501 VCMPSD $0x1,%XMM0,%XMM7,%K1 |
(1200) 0x45b508 VMOVSD %XMM1,%XMM4,%XMM4{%K1} |
(1200) 0x45b50e JMP 45b4a0 |
0x45b520 VADDSD %XMM4,%XMM6,%XMM4 |
0x45b524 MOV -0x40(%RBP),%R8 |
0x45b528 MOV -0xf8(%RBP),%R10 |
0x45b52f JMP 45b624 |
0x45b540 MOV -0xf8(%RBP),%R10 |
0x45b547 CMPQ $0x2,-0xc8(%RBP) |
0x45b54f JL 45b620 |
0x45b555 MOV -0x170(%RBP),%RCX |
0x45b55c MOV -0xa8(%RBP),%RSI |
0x45b563 MOV (%RCX,%RSI,8),%RDX |
0x45b567 MOV 0x8(%RCX,%RSI,8),%R9 |
0x45b56c MOV %R9,%R8 |
0x45b56f SUB %RDX,%R8 |
0x45b572 JLE 45b740 |
0x45b578 VCVTSI2SDQ -0x78(%RBP),%XMM9,%XMM6 |
0x45b57e CMP $0x4,%R8 |
0x45b582 MOV -0x38(%RBP),%R12 |
0x45b586 JAE 45b8a0 |
0x45b58c MOV %R8,%RCX |
0x45b58f AND $-0x4,%RCX |
0x45b593 CMP %R8,%RCX |
0x45b596 JAE 45b9e0 |
0x45b59c ADD %RCX,%RDX |
0x45b59f MOV -0x40(%RBP),%R8 |
0x45b5a3 MOV -0xa0(%RBP),%RBX |
0x45b5aa JMP 45b5cc |
(1198) 0x45b5c0 INC %RDX |
(1198) 0x45b5c3 CMP %RDX,%R9 |
(1198) 0x45b5c6 JE 45ba00 |
(1198) 0x45b5cc MOV -0xd8(%RBP),%RCX |
(1198) 0x45b5d3 MOV (%RCX,%RDX,8),%RCX |
(1198) 0x45b5d7 MOV (%R12,%RCX,8),%RCX |
(1198) 0x45b5db CMP %R15,%RCX |
(1198) 0x45b5de JL 45b5c0 |
(1198) 0x45b5e0 MOV -0x48(%RBP),%RAX |
(1198) 0x45b5e4 VMOVSD (%RAX,%RDX,8),%XMM7 |
(1198) 0x45b5e9 MOV -0xe8(%RBP),%RAX |
(1198) 0x45b5f0 VMULSD %XMM6,%XMM7,%XMM1 |
(1198) 0x45b5f4 VUCOMISD %XMM0,%XMM1 |
(1198) 0x45b5f8 JAE 45b5c0 |
(1198) 0x45b5fa MOV -0x58(%RBP),%RSI |
(1198) 0x45b5fe MOV (%RSI),%RSI |
(1198) 0x45b601 VFMADD213SD (%RSI,%RCX,8),%XMM5,%XMM7 |
(1198) 0x45b607 VMOVSD %XMM7,(%RSI,%RCX,8) |
(1198) 0x45b60c JMP 45b5c0 |
0x45b620 MOV -0x40(%RBP),%R8 |
0x45b624 MOV -0x38(%RBP),%R12 |
0x45b628 MOV -0x70(%RBP),%R9 |
0x45b62c MOV -0xa0(%RBP),%RBX |
0x45b633 JMP 45b246 |
0x45b640 MOV %R10,%RCX |
0x45b643 SHR $0x2,%RCX |
0x45b647 LEA 0x20(,%R12,8),%RBX |
0x45b64f VXORPD %XMM5,%XMM5,%XMM5 |
0x45b653 JMP 45b68b |
(1204) 0x45b660 MOV -0x68(%RBP),%RSI |
(1204) 0x45b664 VMOVSD (%RSI,%RBX,1),%XMM1 |
(1204) 0x45b669 VMULSD %XMM6,%XMM1,%XMM7 |
(1204) 0x45b66d VADDSD %XMM5,%XMM1,%XMM1 |
(1204) 0x45b671 VCMPSD $0x1,%XMM0,%XMM7,%K1 |
(1204) 0x45b678 VMOVSD %XMM1,%XMM5,%XMM5{%K1} |
(1204) 0x45b67e ADD $0x20,%RBX |
(1204) 0x45b682 DEC %RCX |
(1204) 0x45b685 JE 45b2d2 |
(1204) 0x45b68b MOV -0x18(%RAX,%RBX,1),%RSI |
(1204) 0x45b690 CMP %R11,(%R14,%RSI,8) |
(1204) 0x45b694 JGE 45b69b |
(1204) 0x45b696 CMP %RDX,%RSI |
(1204) 0x45b699 JNE 45b6ba |
(1204) 0x45b69b MOV -0x68(%RBP),%RSI |
(1204) 0x45b69f VMOVSD -0x18(%RSI,%RBX,1),%XMM1 |
(1204) 0x45b6a5 VMULSD %XMM6,%XMM1,%XMM7 |
(1204) 0x45b6a9 VADDSD %XMM5,%XMM1,%XMM1 |
(1204) 0x45b6ad VCMPSD $0x1,%XMM0,%XMM7,%K1 |
(1204) 0x45b6b4 VMOVSD %XMM1,%XMM5,%XMM5{%K1} |
(1204) 0x45b6ba MOV -0x10(%RAX,%RBX,1),%RSI |
(1204) 0x45b6bf CMP %R11,(%R14,%RSI,8) |
(1204) 0x45b6c3 JGE 45b6ca |
(1204) 0x45b6c5 CMP %RDX,%RSI |
(1204) 0x45b6c8 JNE 45b6e9 |
(1204) 0x45b6ca MOV -0x68(%RBP),%RSI |
(1204) 0x45b6ce VMOVSD -0x10(%RSI,%RBX,1),%XMM1 |
(1204) 0x45b6d4 VMULSD %XMM6,%XMM1,%XMM7 |
(1204) 0x45b6d8 VADDSD %XMM5,%XMM1,%XMM1 |
(1204) 0x45b6dc VCMPSD $0x1,%XMM0,%XMM7,%K1 |
(1204) 0x45b6e3 VMOVSD %XMM1,%XMM5,%XMM5{%K1} |
(1204) 0x45b6e9 MOV -0x8(%RAX,%RBX,1),%RSI |
(1204) 0x45b6ee CMP %R11,(%R14,%RSI,8) |
(1204) 0x45b6f2 JGE 45b6f9 |
(1204) 0x45b6f4 CMP %RDX,%RSI |
(1204) 0x45b6f7 JNE 45b718 |
(1204) 0x45b6f9 MOV -0x68(%RBP),%RSI |
(1204) 0x45b6fd VMOVSD -0x8(%RSI,%RBX,1),%XMM1 |
(1204) 0x45b703 VMULSD %XMM6,%XMM1,%XMM7 |
(1204) 0x45b707 VADDSD %XMM5,%XMM1,%XMM1 |
(1204) 0x45b70b VCMPSD $0x1,%XMM0,%XMM7,%K1 |
(1204) 0x45b712 VMOVSD %XMM1,%XMM5,%XMM5{%K1} |
(1204) 0x45b718 MOV (%RAX,%RBX,1),%RSI |
(1204) 0x45b71c CMP %R11,(%R14,%RSI,8) |
(1204) 0x45b720 JGE 45b660 |
(1204) 0x45b726 CMP %RDX,%RSI |
(1204) 0x45b729 JNE 45b67e |
(1204) 0x45b72f JMP 45b660 |
0x45b740 MOV -0x40(%RBP),%R8 |
0x45b744 MOV -0x38(%RBP),%R12 |
0x45b748 MOV -0x30(%RBP),%RDX |
0x45b74c JMP 45b628 |
0x45b760 SHR $0x2,%RBX |
0x45b764 LEA 0x18(,%R10,8),%RCX |
0x45b76c MOV -0x38(%RBP),%R12 |
0x45b770 JMP 45b791 |
(1202) 0x45b780 MOV -0x30(%RBP),%RDX |
(1202) 0x45b784 ADD $0x20,%RCX |
(1202) 0x45b788 DEC %RBX |
(1202) 0x45b78b JE 45b3fc |
(1202) 0x45b791 MOV -0x18(%RDI,%RCX,1),%RSI |
(1202) 0x45b796 CMP %R15,(%R12,%RSI,8) |
(1202) 0x45b79a JGE 45b7e0 |
(1202) 0x45b79c MOV -0x10(%RDI,%RCX,1),%RSI |
(1202) 0x45b7a1 MOV -0x38(%RBP),%RDX |
(1202) 0x45b7a5 CMP %R15,(%RDX,%RSI,8) |
(1202) 0x45b7a9 JGE 45b80e |
(1202) 0x45b7ab MOV -0x8(%RDI,%RCX,1),%RSI |
(1202) 0x45b7b0 MOV -0x38(%RBP),%RDX |
(1202) 0x45b7b4 CMP %R15,(%RDX,%RSI,8) |
(1202) 0x45b7b8 JGE 45b840 |
(1202) 0x45b7be MOV (%RDI,%RCX,1),%RSI |
(1202) 0x45b7c2 MOV -0x38(%RBP),%RDX |
(1202) 0x45b7c6 CMP %R15,(%RDX,%RSI,8) |
(1202) 0x45b7ca JL 45b780 |
(1202) 0x45b7cc JMP 45b871 |
(1202) 0x45b7e0 MOV -0x48(%RBP),%RSI |
(1202) 0x45b7e4 VMOVSD -0x18(%RSI,%RCX,1),%XMM1 |
(1202) 0x45b7ea VMULSD %XMM6,%XMM1,%XMM7 |
(1202) 0x45b7ee VADDSD %XMM5,%XMM1,%XMM1 |
(1202) 0x45b7f2 VCMPSD $0x1,%XMM0,%XMM7,%K1 |
(1202) 0x45b7f9 VMOVSD %XMM1,%XMM5,%XMM5{%K1} |
(1202) 0x45b7ff MOV -0x10(%RDI,%RCX,1),%RSI |
(1202) 0x45b804 MOV -0x38(%RBP),%RDX |
(1202) 0x45b808 CMP %R15,(%RDX,%RSI,8) |
(1202) 0x45b80c JL 45b7ab |
(1202) 0x45b80e MOV -0x48(%RBP),%RSI |
(1202) 0x45b812 VMOVSD -0x10(%RSI,%RCX,1),%XMM1 |
(1202) 0x45b818 VMULSD %XMM6,%XMM1,%XMM7 |
(1202) 0x45b81c VADDSD %XMM5,%XMM1,%XMM1 |
(1202) 0x45b820 VCMPSD $0x1,%XMM0,%XMM7,%K1 |
(1202) 0x45b827 VMOVSD %XMM1,%XMM5,%XMM5{%K1} |
(1202) 0x45b82d MOV -0x8(%RDI,%RCX,1),%RSI |
(1202) 0x45b832 MOV -0x38(%RBP),%RDX |
(1202) 0x45b836 CMP %R15,(%RDX,%RSI,8) |
(1202) 0x45b83a JL 45b7be |
(1202) 0x45b840 MOV -0x48(%RBP),%RSI |
(1202) 0x45b844 VMOVSD -0x8(%RSI,%RCX,1),%XMM1 |
(1202) 0x45b84a VMULSD %XMM6,%XMM1,%XMM7 |
(1202) 0x45b84e VADDSD %XMM5,%XMM1,%XMM1 |
(1202) 0x45b852 VCMPSD $0x1,%XMM0,%XMM7,%K1 |
(1202) 0x45b859 VMOVSD %XMM1,%XMM5,%XMM5{%K1} |
(1202) 0x45b85f MOV (%RDI,%RCX,1),%RSI |
(1202) 0x45b863 MOV -0x38(%RBP),%RDX |
(1202) 0x45b867 CMP %R15,(%RDX,%RSI,8) |
(1202) 0x45b86b JL 45b780 |
(1202) 0x45b871 MOV -0x48(%RBP),%RSI |
(1202) 0x45b875 VMOVSD (%RSI,%RCX,1),%XMM1 |
(1202) 0x45b87a VMULSD %XMM6,%XMM1,%XMM7 |
(1202) 0x45b87e VADDSD %XMM5,%XMM1,%XMM1 |
(1202) 0x45b882 VCMPSD $0x1,%XMM0,%XMM7,%K1 |
(1202) 0x45b889 VMOVSD %XMM1,%XMM5,%XMM5{%K1} |
(1202) 0x45b88f JMP 45b780 |
0x45b8a0 MOV %R8,%RCX |
0x45b8a3 SHR $0x2,%RCX |
0x45b8a7 LEA 0x18(,%RDX,8),%RBX |
0x45b8af JMP 45b8cd |
(1199) 0x45b8c0 ADD $0x20,%RBX |
(1199) 0x45b8c4 DEC %RCX |
(1199) 0x45b8c7 JE 45b58c |
(1199) 0x45b8cd MOV -0x18(%RDI,%RBX,1),%RSI |
(1199) 0x45b8d2 MOV (%R12,%RSI,8),%RSI |
(1199) 0x45b8d6 CMP %R15,%RSI |
(1199) 0x45b8d9 JL 45b90f |
(1199) 0x45b8db MOV -0x48(%RBP),%RAX |
(1199) 0x45b8df VMOVSD -0x18(%RAX,%RBX,1),%XMM7 |
(1199) 0x45b8e5 MOV -0xe8(%RBP),%RAX |
(1199) 0x45b8ec VMULSD %XMM6,%XMM7,%XMM1 |
(1199) 0x45b8f0 VUCOMISD %XMM0,%XMM1 |
(1199) 0x45b8f4 JAE 45b90f |
(1199) 0x45b8f6 MOV -0x58(%RBP),%RDI |
(1199) 0x45b8fa MOV (%RDI),%RDI |
(1199) 0x45b8fd VFMADD213SD (%RDI,%RSI,8),%XMM5,%XMM7 |
(1199) 0x45b903 VMOVSD %XMM7,(%RDI,%RSI,8) |
(1199) 0x45b908 MOV -0xd8(%RBP),%RDI |
(1199) 0x45b90f MOV -0x10(%RDI,%RBX,1),%RSI |
(1199) 0x45b914 MOV (%R12,%RSI,8),%RSI |
(1199) 0x45b918 CMP %R15,%RSI |
(1199) 0x45b91b JL 45b951 |
(1199) 0x45b91d MOV -0x48(%RBP),%RAX |
(1199) 0x45b921 VMOVSD -0x10(%RAX,%RBX,1),%XMM7 |
(1199) 0x45b927 MOV -0xe8(%RBP),%RAX |
(1199) 0x45b92e VMULSD %XMM6,%XMM7,%XMM1 |
(1199) 0x45b932 VUCOMISD %XMM0,%XMM1 |
(1199) 0x45b936 JAE 45b951 |
(1199) 0x45b938 MOV -0x58(%RBP),%RDI |
(1199) 0x45b93c MOV (%RDI),%RDI |
(1199) 0x45b93f VFMADD213SD (%RDI,%RSI,8),%XMM5,%XMM7 |
(1199) 0x45b945 VMOVSD %XMM7,(%RDI,%RSI,8) |
(1199) 0x45b94a MOV -0xd8(%RBP),%RDI |
(1199) 0x45b951 MOV -0x8(%RDI,%RBX,1),%RSI |
(1199) 0x45b956 MOV (%R12,%RSI,8),%RSI |
(1199) 0x45b95a CMP %R15,%RSI |
(1199) 0x45b95d JL 45b993 |
(1199) 0x45b95f MOV -0x48(%RBP),%RAX |
(1199) 0x45b963 VMOVSD -0x8(%RAX,%RBX,1),%XMM7 |
(1199) 0x45b969 MOV -0xe8(%RBP),%RAX |
(1199) 0x45b970 VMULSD %XMM6,%XMM7,%XMM1 |
(1199) 0x45b974 VUCOMISD %XMM0,%XMM1 |
(1199) 0x45b978 JAE 45b993 |
(1199) 0x45b97a MOV -0x58(%RBP),%RDI |
(1199) 0x45b97e MOV (%RDI),%RDI |
(1199) 0x45b981 VFMADD213SD (%RDI,%RSI,8),%XMM5,%XMM7 |
(1199) 0x45b987 VMOVSD %XMM7,(%RDI,%RSI,8) |
(1199) 0x45b98c MOV -0xd8(%RBP),%RDI |
(1199) 0x45b993 MOV (%RDI,%RBX,1),%RSI |
(1199) 0x45b997 MOV (%R12,%RSI,8),%RSI |
(1199) 0x45b99b CMP %R15,%RSI |
(1199) 0x45b99e JL 45b8c0 |
(1199) 0x45b9a4 MOV -0x48(%RBP),%RAX |
(1199) 0x45b9a8 VMOVSD (%RAX,%RBX,1),%XMM7 |
(1199) 0x45b9ad MOV -0xe8(%RBP),%RAX |
(1199) 0x45b9b4 VMULSD %XMM6,%XMM7,%XMM1 |
(1199) 0x45b9b8 VUCOMISD %XMM0,%XMM1 |
(1199) 0x45b9bc JAE 45b8c0 |
(1199) 0x45b9c2 MOV -0x58(%RBP),%RDI |
(1199) 0x45b9c6 MOV (%RDI),%RDI |
(1199) 0x45b9c9 VFMADD213SD (%RDI,%RSI,8),%XMM5,%XMM7 |
(1199) 0x45b9cf VMOVSD %XMM7,(%RDI,%RSI,8) |
(1199) 0x45b9d4 MOV -0xd8(%RBP),%RDI |
(1199) 0x45b9db JMP 45b8c0 |
0x45b9e0 MOV -0x40(%RBP),%R8 |
0x45b9e4 MOV -0x30(%RBP),%RDX |
0x45b9e8 JMP 45b628 |
0x45ba00 MOV -0x30(%RBP),%RDX |
0x45ba04 MOV -0x70(%RBP),%R9 |
0x45ba08 JMP 45b246 |
/home/kcamus/qaas_runs/169-443-9681/intel/AMG/build/AMG/AMG/parcsr_ls/par_lr_interp.c: 1221 - 1675 |
-------------------------------------------------------------------------------- |
1221: if (n_fine) |
[...] |
1609: for (jj = A_diag_i[i]+1; jj < A_diag_i[i+1]; jj++) |
1610: { /* i1 is a c-point and strongly influences i, accumulate |
1611: * a_(i,i1) into interpolation weight */ |
1612: i1 = A_diag_j[jj]; |
1613: if (P_marker[i1] >= jj_begin_row) |
1614: { |
1615: P_diag_data[P_marker[i1]] += A_diag_data[jj]; |
1616: } |
1617: else if(P_marker[i1] == strong_f_marker) |
1618: { |
1619: sum = zero; |
1620: sgn = 1; |
1621: if(A_diag_data[A_diag_i[i1]] < 0) sgn = -1; |
1622: /* Loop over row of A for point i1 and calculate the sum |
1623: * of the connections to c-points that strongly influence i. */ |
1624: for(jj1 = A_diag_i[i1]+1; jj1 < A_diag_i[i1+1]; jj1++) |
1625: { |
1626: i2 = A_diag_j[jj1]; |
1627: if((P_marker[i2] >= jj_begin_row || i2 == i) && (sgn*A_diag_data[jj1]) < 0) |
1628: sum += A_diag_data[jj1]; |
1629: } |
1630: if(num_procs > 1) |
1631: { |
1632: for(jj1 = A_offd_i[i1]; jj1< A_offd_i[i1+1]; jj1++) |
1633: { |
1634: i2 = A_offd_j[jj1]; |
1635: if(P_marker_offd[i2] >= jj_begin_row_offd && |
1636: (sgn*A_offd_data[jj1]) < 0) |
1637: sum += A_offd_data[jj1]; |
1638: } |
1639: } |
1640: if(sum != 0) |
1641: { |
1642: distribute = A_diag_data[jj]/sum; |
1643: /* Loop over row of A for point i1 and do the distribution */ |
1644: for(jj1 = A_diag_i[i1]+1; jj1 < A_diag_i[i1+1]; jj1++) |
1645: { |
1646: i2 = A_diag_j[jj1]; |
1647: if(P_marker[i2] >= jj_begin_row && (sgn*A_diag_data[jj1]) < 0) |
1648: P_diag_data[P_marker[i2]] += |
1649: distribute*A_diag_data[jj1]; |
1650: if(i2 == i && (sgn*A_diag_data[jj1]) < 0) |
1651: diagonal += distribute*A_diag_data[jj1]; |
1652: } |
1653: if(num_procs > 1) |
1654: { |
1655: for(jj1 = A_offd_i[i1]; jj1 < A_offd_i[i1+1]; jj1++) |
1656: { |
1657: i2 = A_offd_j[jj1]; |
1658: if(P_marker_offd[i2] >= jj_begin_row_offd && |
1659: (sgn*A_offd_data[jj1]) < 0) |
1660: P_offd_data[P_marker_offd[i2]] += |
[...] |
1667: diagonal += A_diag_data[jj]; |
1668: } |
1669: } |
1670: /* neighbor i1 weakly influences i, accumulate a_(i,i1) into |
1671: * diagonal */ |
1672: else if (CF_marker[i1] != -3) |
1673: { |
1674: if(num_functions == 1 || dof_func[i] == dof_func[i1]) |
1675: diagonal += A_diag_data[jj]; |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_fork_call | libiomp5.so | |
○ | __kmpc_fork_call | libiomp5.so | |
○ | hypre_BoomerAMGBuildExtPIInter[...] | par_lr_interp.c:1196 | exec |
○ | hypre_BoomerAMGSetup | par_amg_setup.c:847 | exec |
○ | hypre_PCGSetup | pcg.c:234 | exec |
○ | main | amg.c:398 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.07 |
CQA speedup if FP arith vectorized | 2.84 |
CQA speedup if fully vectorized | 8.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.23 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildExtPIInterp.extracted |
Source | par_lr_interp.c:1221-1675 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 38.75 |
CQA cycles if no scalar integer | 18.75 |
CQA cycles if FP arith vectorized | 13.66 |
CQA cycles if fully vectorized | 4.84 |
Front-end cycles | 38.75 |
DIV/SQRT cycles | 20.25 |
P0 cycles | 20.25 |
P1 cycles | 31.50 |
P2 cycles | 31.50 |
P3 cycles | 6.00 |
P4 cycles | 20.25 |
P5 cycles | 20.25 |
P6 cycles | 6.00 |
P7 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 39.01 - 39.02 |
Stall cycles (UFS) | 0.00 |
Nb insns | 150.00 |
Nb uops | 152.00 |
Nb loads | 63.00 |
Nb stores | 6.00 |
Nb stack references | 19.00 |
FLOP/cycle | 0.10 |
Nb FLOP add-sub | 3.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 14.25 |
Bytes prefetched | 0.00 |
Bytes loaded | 504.00 |
Bytes stored | 48.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 6.52 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 11.11 |
Vector-efficiency ratio all | 13.32 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 13.89 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.07 |
CQA speedup if FP arith vectorized | 2.84 |
CQA speedup if fully vectorized | 8.00 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.23 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildExtPIInterp.extracted |
Source | par_lr_interp.c:1221-1675 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 38.75 |
CQA cycles if no scalar integer | 18.75 |
CQA cycles if FP arith vectorized | 13.66 |
CQA cycles if fully vectorized | 4.84 |
Front-end cycles | 38.75 |
DIV/SQRT cycles | 20.25 |
P0 cycles | 20.25 |
P1 cycles | 31.50 |
P2 cycles | 31.50 |
P3 cycles | 6.00 |
P4 cycles | 20.25 |
P5 cycles | 20.25 |
P6 cycles | 6.00 |
P7 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 39.01 - 39.02 |
Stall cycles (UFS) | 0.00 |
Nb insns | 150.00 |
Nb uops | 152.00 |
Nb loads | 63.00 |
Nb stores | 6.00 |
Nb stack references | 19.00 |
FLOP/cycle | 0.10 |
Nb FLOP add-sub | 3.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 14.25 |
Bytes prefetched | 0.00 |
Bytes loaded | 504.00 |
Bytes stored | 48.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 6.52 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 11.11 |
Vector-efficiency ratio all | 13.32 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 13.89 |
Path / |
Function | hypre_BoomerAMGBuildExtPIInterp.extracted |
Source file and lines | par_lr_interp.c:1221-1675 |
Module | exec |
nb instructions | 150 |
nb uops | 152 |
loop length | 689 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 19 |
micro-operation queue | 38.75 cycles |
front end | 38.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 20.25 | 20.25 | 31.50 | 31.50 | 6.00 | 20.25 | 20.25 | 6.00 |
cycles | 20.25 | 20.25 | 31.50 | 31.50 | 6.00 | 20.25 | 20.25 | 6.00 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 39.01-39.02 |
Stall cycles | 0.00 |
Front-end | 38.75 |
Dispatch | 31.50 |
DIV/SQRT | 4.00 |
Overall L1 | 38.75 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 4% |
all | 18% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 50% |
all | 6% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 11% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 14% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 18% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0xb8(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD (%RDX,%RCX,8),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x68(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDSD (%RSI,%R13,8),%XMM5,%XMM5 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM5,(%RDX,%RCX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
INC %R13 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RBX,%R13 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 45ba24 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV (%RAX,%R13,8),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%R14,%RDI,8),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R11,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 45b220 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %R9,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 45b340 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x190(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RDX,%RDI,8),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x68(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VUCOMISD (%RSI,%R12,8),%XMM5 | 2 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RDI,-0xa8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x8(%RDX,%RDI,8),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
SETBE %CL | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 |
ADD %RCX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
DEC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA 0x1(%R12),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV -0xd8(%RBP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JGE 45b3a0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VCVTSI2SDQ -0x78(%RBP),%XMM9,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 6 | 0.50 |
MOV %R12,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOT %R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %R9,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0x4,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JAE 45b640 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x4,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R10,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JAE 45b3a0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %R12,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JMP 45b326 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
CMPQ $-0x3,(%R8,%RDI,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JE 45b242 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMPQ $0x1,-0x188(%RBP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JE 45b372 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x180(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x30(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RDX,%RCX,8),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP (%RDX,%RDI,8),%RCX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JNE 45b242 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDSD (%RCX,%R13,8),%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 45b242 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV -0xd0(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RCX),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RCX,-0xc8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP $0x2,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JL 45b460 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x170(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0xa8(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RCX,%RSI,8),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%RCX,%RSI,8),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R10,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 45b460 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VCVTSI2SDQ -0x78(%RBP),%XMM9,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 6 | 0.50 |
CMP $0x4,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,-0x140(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RBX,-0x118(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JAE 45b760 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x118(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x4,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RSI,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV -0x140(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JAE 45b460 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %RCX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JMP 45b42c | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VUCOMISD %XMM0,%XMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD (%RCX,%R13,8),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JE 45b520 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VDIVSD %XMM5,%XMM6,%XMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 4 |
CMP %R9,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 45b540 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VCVTSI2SDQ -0x78(%RBP),%XMM9,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 6 | 0.50 |
MOV -0xf8(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b4ac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VADDSD %XMM4,%XMM6,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0xf8(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b624 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV -0xf8(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMPQ $0x2,-0xc8(%RBP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JL 45b620 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x170(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0xa8(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RCX,%RSI,8),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%RCX,%RSI,8),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R9,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %RDX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 45b740 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VCVTSI2SDQ -0x78(%RBP),%XMM9,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 6 | 0.50 |
CMP $0x4,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV -0x38(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JAE 45b8a0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x4,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JAE 45b9e0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %RCX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0xa0(%RBP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b5cc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x70(%RBP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0xa0(%RBP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b246 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SHR $0x2,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA 0x20(,%R12,8),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 45b68b | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b628 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
SHR $0x2,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA 0x18(,%R10,8),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x38(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b791 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SHR $0x2,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA 0x18(,%RDX,8),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 45b8cd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b628 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x70(%RBP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b246 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
Function | hypre_BoomerAMGBuildExtPIInterp.extracted |
Source file and lines | par_lr_interp.c:1221-1675 |
Module | exec |
nb instructions | 150 |
nb uops | 152 |
loop length | 689 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 5 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 19 |
micro-operation queue | 38.75 cycles |
front end | 38.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 20.25 | 20.25 | 31.50 | 31.50 | 6.00 | 20.25 | 20.25 | 6.00 |
cycles | 20.25 | 20.25 | 31.50 | 31.50 | 6.00 | 20.25 | 20.25 | 6.00 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 39.01-39.02 |
Stall cycles | 0.00 |
Front-end | 38.75 |
Dispatch | 31.50 |
DIV/SQRT | 4.00 |
Overall L1 | 38.75 |
all | 2% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 4% |
all | 18% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 50% |
all | 6% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 11% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 14% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 18% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0xb8(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD (%RDX,%RCX,8),%XMM5 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x68(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDSD (%RSI,%R13,8),%XMM5,%XMM5 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMOVSD %XMM5,(%RDX,%RCX,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
INC %R13 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RBX,%R13 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 45ba24 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV (%RAX,%R13,8),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%R14,%RDI,8),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R11,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 45b220 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP %R9,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JNE 45b340 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x190(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RDX,%RDI,8),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV -0x68(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VUCOMISD (%RSI,%R12,8),%XMM5 | 2 | 1 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %RDI,-0xa8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x8(%RDX,%RDI,8),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
SETBE %CL | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 |
ADD %RCX,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
DEC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA 0x1(%R12),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %R9,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV -0xd8(%RBP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JGE 45b3a0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VCVTSI2SDQ -0x78(%RBP),%XMM9,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 6 | 0.50 |
MOV %R12,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOT %R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %R9,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP $0x4,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JAE 45b640 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x4,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R10,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JAE 45b3a0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %R12,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
INC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JMP 45b326 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
CMPQ $-0x3,(%R8,%RDI,8) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JE 45b242 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMPQ $0x1,-0x188(%RBP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JE 45b372 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x180(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x30(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RDX,%RCX,8),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP (%RDX,%RDI,8),%RCX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JNE 45b242 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VADDSD (%RCX,%R13,8),%XMM4,%XMM4 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4 | 0.50 |
JMP 45b242 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV -0xd0(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RCX),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RCX,-0xc8(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP $0x2,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JL 45b460 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x170(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0xa8(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RCX,%RSI,8),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%RCX,%RSI,8),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RCX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R10,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 45b460 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VCVTSI2SDQ -0x78(%RBP),%XMM9,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 6 | 0.50 |
CMP $0x4,%RBX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RCX,-0x140(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RBX,-0x118(%RBP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JAE 45b760 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x118(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x4,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RSI,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV -0x140(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JAE 45b460 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %RCX,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JMP 45b42c | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VUCOMISD %XMM0,%XMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
VMOVSD (%RCX,%R13,8),%XMM6 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JE 45b520 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VDIVSD %XMM5,%XMM6,%XMM5 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-14 | 4 |
CMP %R9,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 45b540 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VCVTSI2SDQ -0x78(%RBP),%XMM9,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 6 | 0.50 |
MOV -0xf8(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b4ac | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
VADDSD %XMM4,%XMM6,%XMM4 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0xf8(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b624 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV -0xf8(%RBP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMPQ $0x2,-0xc8(%RBP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
JL 45b620 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV -0x170(%RBP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0xa8(%RBP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%RCX,%RSI,8),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%RCX,%RSI,8),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R9,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %RDX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 45b740 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VCVTSI2SDQ -0x78(%RBP),%XMM9,%XMM6 | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 6 | 0.50 |
CMP $0x4,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV -0x38(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JAE 45b8a0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x4,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %R8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JAE 45b9e0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %RCX,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0xa0(%RBP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b5cc | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x70(%RBP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0xa0(%RBP),%RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b246 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SHR $0x2,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA 0x20(,%R12,8),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VXORPD %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 45b68b | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x38(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b628 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
SHR $0x2,%RBX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA 0x18(,%R10,8),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x38(%RBP),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b791 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SHR $0x2,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA 0x18(,%RDX,8),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 45b8cd | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b628 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV -0x70(%RBP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 45b246 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |