Loop Id: 963 | Module: exec | Source: par_multi_interp.c:1747-1876 [...] | Coverage: 0.13% |
---|
Loop Id: 963 | Module: exec | Source: par_multi_interp.c:1747-1876 [...] | Coverage: 0.13% |
---|
0x443470 MOV -0xa0(%RBP),%RDX |
0x443477 INC %RDX |
0x44347a CMP -0x58(%RBP),%RDX |
0x44347e JGE 443f1b |
0x443484 MOV -0x110(%RBP),%RAX |
0x44348b MOV %RDX,-0xa0(%RBP) |
0x443492 MOV (%RAX,%RDX,8),%R9 |
0x443496 MOV -0x118(%RBP),%RAX |
0x44349d MOV (%RAX,%R9,8),%R8 |
0x4434a1 MOV -0xa8(%RBP),%RAX |
0x4434a8 MOV (%RAX,%R9,8),%R12 |
0x4434ac MOV 0x8(%RAX,%R9,8),%RSI |
0x4434b1 LEA (%RSI,%R8,1),%RAX |
0x4434b5 SUB %R12,%RAX |
0x4434b8 CMP %RAX,%R8 |
0x4434bb MOV %R9,-0x48(%RBP) |
0x4434bf JGE 44369f |
0x4434c5 MOV -0x60(%RBP),%RAX |
0x4434c9 MOV (%RAX),%RCX |
0x4434cc MOV %RSI,%RAX |
0x4434cf SUB %R12,%RAX |
0x4434d2 CMP $0xd,%RAX |
0x4434d6 JB 443660 |
0x4434dc MOV %RSI,-0x98(%RBP) |
0x4434e3 VMOVUPD %XMM10,-0x70(%RBP) |
0x4434e8 LEA (%R14,%R12,8),%RDI |
0x4434ec LEA (,%RAX,8),%RDX |
0x4434f4 XOR %ESI,%ESI |
0x4434f6 MOV %R8,-0x40(%RBP) |
0x4434fa MOV %RAX,-0x88(%RBP) |
0x443501 MOV %RCX,-0x90(%RBP) |
0x443508 VZEROUPPER |
0x44350b CALL 4efbb0 <_intel_fast_memset> |
0x443510 MOV -0xb8(%RBP),%RAX |
0x443517 MOV -0x90(%RBP),%RCX |
0x44351e MOV (%RAX,%RCX,8),%RAX |
0x443522 MOV -0x88(%RBP),%RCX |
0x443529 SHR $0x3,%RCX |
0x44352d MOV -0x40(%RBP),%RDX |
0x443531 LEA 0x38(%RAX,%RDX,8),%RDX |
0x443536 MOV -0xf8(%RBP),%RSI |
0x44353d LEA (%RSI,%R12,8),%RSI |
0x443541 MOV %RCX,%RDI |
0x443544 XOR %R8D,%R8D |
0x443547 MOV -0x30(%RBP),%R11 |
0x44354b NOPL (%RAX,%RAX,1) |
(981) 0x443550 MOV -0x38(%RDX,%R8,8),%R9 |
(981) 0x443555 LEA (%R12,%R8,1),%R10 |
(981) 0x443559 MOV %R10,(%R11,%R9,8) |
(981) 0x44355d MOV %R9,-0x38(%RSI,%R8,8) |
(981) 0x443562 MOV -0x30(%RDX,%R8,8),%R9 |
(981) 0x443567 LEA 0x1(%R12,%R8,1),%R10 |
(981) 0x44356c MOV %R10,(%R11,%R9,8) |
(981) 0x443570 MOV %R9,-0x30(%RSI,%R8,8) |
(981) 0x443575 MOV -0x28(%RDX,%R8,8),%R9 |
(981) 0x44357a LEA 0x2(%R12,%R8,1),%R10 |
(981) 0x44357f MOV %R10,(%R11,%R9,8) |
(981) 0x443583 MOV %R9,-0x28(%RSI,%R8,8) |
(981) 0x443588 MOV -0x20(%RDX,%R8,8),%R9 |
(981) 0x44358d LEA 0x3(%R12,%R8,1),%R10 |
(981) 0x443592 MOV %R10,(%R11,%R9,8) |
(981) 0x443596 MOV %R9,-0x20(%RSI,%R8,8) |
(981) 0x44359b MOV -0x18(%RDX,%R8,8),%R9 |
(981) 0x4435a0 LEA 0x4(%R12,%R8,1),%R10 |
(981) 0x4435a5 MOV %R10,(%R11,%R9,8) |
(981) 0x4435a9 MOV %R9,-0x18(%RSI,%R8,8) |
(981) 0x4435ae MOV -0x10(%RDX,%R8,8),%R9 |
(981) 0x4435b3 LEA 0x5(%R12,%R8,1),%R10 |
(981) 0x4435b8 MOV %R10,(%R11,%R9,8) |
(981) 0x4435bc MOV %R9,-0x10(%RSI,%R8,8) |
(981) 0x4435c1 MOV -0x8(%RDX,%R8,8),%R9 |
(981) 0x4435c6 LEA 0x6(%R12,%R8,1),%R10 |
(981) 0x4435cb MOV %R10,(%R11,%R9,8) |
(981) 0x4435cf MOV %R9,-0x8(%RSI,%R8,8) |
(981) 0x4435d4 MOV (%RDX,%R8,8),%R9 |
(981) 0x4435d8 LEA 0x7(%R12,%R8,1),%R10 |
(981) 0x4435dd MOV %R10,(%R11,%R9,8) |
(981) 0x4435e1 MOV %R9,(%RSI,%R8,8) |
(981) 0x4435e5 ADD $0x8,%R8 |
(981) 0x4435e9 DEC %RDI |
(981) 0x4435ec JNE 443550 |
0x4435f2 MOV -0x88(%RBP),%RSI |
0x4435f9 MOV %RSI,%RDX |
0x4435fc AND $-0x8,%RDX |
0x443600 CMP %RSI,%RDX |
0x443603 MOV -0x38(%RBP),%R10 |
0x443607 MOV -0x50(%RBP),%RSI |
0x44360b VXORPD %XMM9,%XMM9,%XMM9 |
0x443610 MOV -0x48(%RBP),%R9 |
0x443614 VMOVUPD -0x70(%RBP),%XMM10 |
0x443619 MOV -0x98(%RBP),%RDI |
0x443620 MOV -0x40(%RBP),%R8 |
0x443624 JE 44369f |
0x443626 ADD %RDX,%R12 |
0x443629 SAL $0x6,%RCX |
0x44362d LEA (%RCX,%R8,8),%RCX |
0x443631 ADD %RCX,%RAX |
0x443634 NOPW %CS:(%RAX,%RAX,1) |
(982) 0x443640 MOV (%RAX),%RCX |
(982) 0x443643 MOV %R12,(%R11,%RCX,8) |
(982) 0x443647 MOV %RCX,(%RSI,%R12,8) |
(982) 0x44364b INC %R12 |
(982) 0x44364e ADD $0x8,%RAX |
(982) 0x443652 CMP %R12,%RDI |
(982) 0x443655 JNE 443640 |
0x443657 JMP 44369f |
0x443660 SAL $0x3,%R8 |
0x443664 MOV -0xb8(%RBP),%RAX |
0x44366b ADD (%RAX,%RCX,8),%R8 |
0x44366f MOV -0x50(%RBP),%RCX |
0x443673 MOV -0x30(%RBP),%RDX |
0x443677 NOPW (%RAX,%RAX,1) |
(980) 0x443680 MOV (%R8),%RAX |
(980) 0x443683 MOV %R12,(%RDX,%RAX,8) |
(980) 0x443687 MOVQ $0,(%R14,%R12,8) |
(980) 0x44368f MOV %RAX,(%RCX,%R12,8) |
(980) 0x443693 INC %R12 |
(980) 0x443696 ADD $0x8,%R8 |
(980) 0x44369a CMP %R12,%RSI |
(980) 0x44369d JNE 443680 |
0x44369f MOV -0x120(%RBP),%RAX |
0x4436a6 MOV (%RAX,%R9,8),%R8 |
0x4436aa MOV -0xb0(%RBP),%RAX |
0x4436b1 MOV (%RAX,%R9,8),%R12 |
0x4436b5 MOV 0x8(%RAX,%R9,8),%RCX |
0x4436ba LEA (%RCX,%R8,1),%RAX |
0x4436be SUB %R12,%RAX |
0x4436c1 CMP %RAX,%R8 |
0x4436c4 JGE 443880 |
0x4436ca MOV -0x60(%RBP),%RAX |
0x4436ce MOV (%RAX),%RDX |
0x4436d1 MOV %RCX,%RAX |
0x4436d4 SUB %R12,%RAX |
0x4436d7 CMP $0xd,%RAX |
0x4436db JB 443850 |
0x4436e1 MOV %RCX,-0x88(%RBP) |
0x4436e8 VMOVUPD %XMM10,-0x70(%RBP) |
0x4436ed LEA (%R13,%R12,8),%RDI |
0x4436f2 MOV %RDX,-0x90(%RBP) |
0x4436f9 LEA (,%RAX,8),%RDX |
0x443701 XOR %ESI,%ESI |
0x443703 MOV %R8,-0x98(%RBP) |
0x44370a MOV %RAX,-0x40(%RBP) |
0x44370e VZEROUPPER |
0x443711 CALL 4efbb0 <_intel_fast_memset> |
0x443716 MOV -0x98(%RBP),%R11 |
0x44371d MOV -0xc0(%RBP),%RAX |
0x443724 MOV -0x90(%RBP),%RCX |
0x44372b MOV (%RAX,%RCX,8),%RAX |
0x44372f MOV -0x40(%RBP),%RCX |
0x443733 SHR $0x3,%RCX |
0x443737 LEA 0x38(%RAX,%R11,8),%RDX |
0x44373c MOV -0xf0(%RBP),%RSI |
0x443743 LEA (%RSI,%R12,8),%RSI |
0x443747 MOV %RCX,%RDI |
0x44374a XOR %R8D,%R8D |
0x44374d NOPL (%RAX) |
(978) 0x443750 MOV -0x38(%RDX,%R8,8),%R9 |
(978) 0x443755 LEA (%R12,%R8,1),%R10 |
(978) 0x443759 MOV %R10,(%R15,%R9,8) |
(978) 0x44375d MOV %R9,-0x38(%RSI,%R8,8) |
(978) 0x443762 MOV -0x30(%RDX,%R8,8),%R9 |
(978) 0x443767 LEA 0x1(%R12,%R8,1),%R10 |
(978) 0x44376c MOV %R10,(%R15,%R9,8) |
(978) 0x443770 MOV %R9,-0x30(%RSI,%R8,8) |
(978) 0x443775 MOV -0x28(%RDX,%R8,8),%R9 |
(978) 0x44377a LEA 0x2(%R12,%R8,1),%R10 |
(978) 0x44377f MOV %R10,(%R15,%R9,8) |
(978) 0x443783 MOV %R9,-0x28(%RSI,%R8,8) |
(978) 0x443788 MOV -0x20(%RDX,%R8,8),%R9 |
(978) 0x44378d LEA 0x3(%R12,%R8,1),%R10 |
(978) 0x443792 MOV %R10,(%R15,%R9,8) |
(978) 0x443796 MOV %R9,-0x20(%RSI,%R8,8) |
(978) 0x44379b MOV -0x18(%RDX,%R8,8),%R9 |
(978) 0x4437a0 LEA 0x4(%R12,%R8,1),%R10 |
(978) 0x4437a5 MOV %R10,(%R15,%R9,8) |
(978) 0x4437a9 MOV %R9,-0x18(%RSI,%R8,8) |
(978) 0x4437ae MOV -0x10(%RDX,%R8,8),%R9 |
(978) 0x4437b3 LEA 0x5(%R12,%R8,1),%R10 |
(978) 0x4437b8 MOV %R10,(%R15,%R9,8) |
(978) 0x4437bc MOV %R9,-0x10(%RSI,%R8,8) |
(978) 0x4437c1 MOV -0x8(%RDX,%R8,8),%R9 |
(978) 0x4437c6 LEA 0x6(%R12,%R8,1),%R10 |
(978) 0x4437cb MOV %R10,(%R15,%R9,8) |
(978) 0x4437cf MOV %R9,-0x8(%RSI,%R8,8) |
(978) 0x4437d4 MOV (%RDX,%R8,8),%R9 |
(978) 0x4437d8 LEA 0x7(%R12,%R8,1),%R10 |
(978) 0x4437dd MOV %R10,(%R15,%R9,8) |
(978) 0x4437e1 MOV %R9,(%RSI,%R8,8) |
(978) 0x4437e5 ADD $0x8,%R8 |
(978) 0x4437e9 DEC %RDI |
(978) 0x4437ec JNE 443750 |
0x4437f2 MOV -0x40(%RBP),%RSI |
0x4437f6 MOV %RSI,%RDX |
0x4437f9 AND $-0x8,%RDX |
0x4437fd CMP %RSI,%RDX |
0x443800 MOV -0x38(%RBP),%R10 |
0x443804 VXORPD %XMM9,%XMM9,%XMM9 |
0x443809 MOV -0x48(%RBP),%R9 |
0x44380d VMOVUPD -0x70(%RBP),%XMM10 |
0x443812 MOV -0x88(%RBP),%RSI |
0x443819 JE 443880 |
0x44381b ADD %RDX,%R12 |
0x44381e SAL $0x6,%RCX |
0x443822 LEA (%RCX,%R11,8),%RCX |
0x443826 ADD %RCX,%RAX |
0x443829 NOPL (%RAX) |
(979) 0x443830 MOV (%RAX),%RCX |
(979) 0x443833 MOV %R12,(%R15,%RCX,8) |
(979) 0x443837 MOV %RCX,(%R10,%R12,8) |
(979) 0x44383b INC %R12 |
(979) 0x44383e ADD $0x8,%RAX |
(979) 0x443842 CMP %R12,%RSI |
(979) 0x443845 JNE 443830 |
0x443847 JMP 443880 |
0x443850 SAL $0x3,%R8 |
0x443854 MOV -0xc0(%RBP),%RAX |
0x44385b ADD (%RAX,%RDX,8),%R8 |
0x44385f NOP |
(977) 0x443860 MOV (%R8),%RAX |
(977) 0x443863 MOV %R12,(%R15,%RAX,8) |
(977) 0x443867 MOVQ $0,(%R13,%R12,8) |
(977) 0x443870 MOV %RAX,(%R10,%R12,8) |
(977) 0x443874 INC %R12 |
(977) 0x443877 ADD $0x8,%R8 |
(977) 0x44387b CMP %R12,%RCX |
(977) 0x44387e JNE 443860 |
0x443880 MOV -0xd8(%RBP),%RCX |
0x443887 MOV (%RCX,%R9,8),%RAX |
0x44388b MOV 0x8(%RCX,%R9,8),%RCX |
0x443890 CMP %RCX,%RAX |
0x443893 MOV -0x30(%RBP),%R12 |
0x443897 JGE 4438e0 |
0x443899 MOV -0x60(%RBP),%RDX |
0x44389d MOV (%RDX),%RDX |
0x4438a0 DEC %RDX |
0x4438a3 MOV -0x188(%RBP),%RDI |
0x4438aa JMP 4438b8 |
(976) 0x4438b0 INC %RAX |
(976) 0x4438b3 CMP %RCX,%RAX |
(976) 0x4438b6 JGE 4438e0 |
(976) 0x4438b8 MOV -0x178(%RBP),%RSI |
(976) 0x4438bf MOV (%RSI,%RAX,8),%RSI |
(976) 0x4438c3 CMP %RDX,(%RDI,%RSI,8) |
(976) 0x4438c7 JNE 4438b0 |
(976) 0x4438c9 MOV -0x80(%RBP),%RCX |
(976) 0x4438cd MOV %R9,(%RCX,%RSI,8) |
(976) 0x4438d1 MOV -0xd8(%RBP),%RCX |
(976) 0x4438d8 MOV 0x8(%RCX,%R9,8),%RCX |
(976) 0x4438dd JMP 4438b0 |
0x4438e0 MOV -0xe0(%RBP),%RCX |
0x4438e7 MOV (%RCX,%R9,8),%RAX |
0x4438eb MOV 0x8(%RCX,%R9,8),%RCX |
0x4438f0 CMP %RCX,%RAX |
0x4438f3 JGE 443940 |
0x4438f5 MOV -0x60(%RBP),%RDX |
0x4438f9 MOV (%RDX),%RDX |
0x4438fc DEC %RDX |
0x4438ff MOV -0x190(%RBP),%RDI |
0x443906 JMP 443918 |
(975) 0x443910 INC %RAX |
(975) 0x443913 CMP %RCX,%RAX |
(975) 0x443916 JGE 443940 |
(975) 0x443918 MOV -0x180(%RBP),%RSI |
(975) 0x44391f MOV (%RSI,%RAX,8),%RSI |
(975) 0x443923 CMP %RDX,(%RDI,%RSI,8) |
(975) 0x443927 JNE 443910 |
(975) 0x443929 MOV -0x78(%RBP),%RCX |
(975) 0x44392d MOV %R9,(%RCX,%RSI,8) |
(975) 0x443931 MOV -0xe0(%RBP),%RCX |
(975) 0x443938 MOV 0x8(%RCX,%R9,8),%RCX |
(975) 0x44393d JMP 443910 |
0x443940 MOV -0x100(%RBP),%RAX |
0x443947 MOV (%RAX,%R9,8),%RCX |
0x44394b MOV 0x8(%RAX,%R9,8),%R11 |
0x443950 MOV %RCX,%RAX |
0x443953 MOV %RCX,-0x40(%RBP) |
0x443957 LEA 0x1(%RCX),%RDX |
0x44395b VXORPD %XMM1,%XMM1,%XMM1 |
0x44395f VXORPD %XMM0,%XMM0,%XMM0 |
0x443963 CMP %R11,%RDX |
0x443966 JGE 443ca0 |
0x44396c MOV -0x50(%RBP),%RAX |
0x443970 MOV %R11,-0x70(%RBP) |
0x443974 MOV -0x30(%RBP),%RCX |
0x443978 JMP 443994 |
(970) 0x443980 MOV -0x50(%RBP),%RAX |
(970) 0x443984 MOV -0x48(%RBP),%R9 |
(970) 0x443988 INC %RDX |
(970) 0x44398b CMP %R11,%RDX |
(970) 0x44398e JE 443ca0 |
(970) 0x443994 MOV -0x168(%RBP),%RSI |
(970) 0x44399b MOV (%RSI,%RDX,8),%RSI |
(970) 0x44399f MOV -0x80(%RBP),%RDI |
(970) 0x4439a3 CMP %R9,(%RDI,%RSI,8) |
(970) 0x4439a7 JNE 4439d0 |
(970) 0x4439a9 MOV -0xa8(%RBP),%R8 |
(970) 0x4439b0 MOV (%R8,%RSI,8),%RDI |
(970) 0x4439b4 MOV 0x8(%R8,%RSI,8),%R8 |
(970) 0x4439b9 MOV %R8,%R9 |
(970) 0x4439bc SUB %RDI,%R9 |
(970) 0x4439bf JLE 443b1f |
(970) 0x4439c5 CMP $0x4,%R9 |
(970) 0x4439c9 JAE 443a00 |
(970) 0x4439cb JMP 443ac5 |
(970) 0x4439d0 MOV -0x150(%RBP),%RDI |
(970) 0x4439d7 CMPQ $-0x3,(%RDI,%RSI,8) |
(970) 0x4439dc JE 443988 |
(970) 0x4439de CMPQ $0x1,-0xe8(%RBP) |
(970) 0x4439e6 JE 4439f9 |
(970) 0x4439e8 MOV -0xc8(%RBP),%R8 |
(970) 0x4439ef MOV (%R8,%R9,8),%RDI |
(970) 0x4439f3 CMP (%R8,%RSI,8),%RDI |
(970) 0x4439f7 JNE 443988 |
(970) 0x4439f9 VADDSD (%RBX,%RDX,8),%XMM0,%XMM0 |
(970) 0x4439fe JMP 443988 |
(970) 0x443a00 MOV %R9,%R10 |
(970) 0x443a03 SHR $0x2,%R10 |
(970) 0x443a07 LEA 0x18(,%RDI,8),%R11 |
(970) 0x443a0f NOP |
(973) 0x443a10 MOV -0x18(%RAX,%R11,1),%R12 |
(973) 0x443a15 VMOVSD -0x18(%R14,%R11,1),%XMM2 |
(973) 0x443a1c VMOVSD (%RBX,%RDX,8),%XMM3 |
(973) 0x443a21 MOV (%RCX,%R12,8),%R12 |
(973) 0x443a25 VMOVSD (%R14,%R12,8),%XMM4 |
(973) 0x443a2b VFMADD231SD %XMM2,%XMM3,%XMM4 |
(973) 0x443a30 VMOVSD %XMM4,(%R14,%R12,8) |
(973) 0x443a36 MOV -0x10(%RAX,%R11,1),%R12 |
(973) 0x443a3b VMOVSD -0x10(%R14,%R11,1),%XMM4 |
(973) 0x443a42 VMOVSD (%RBX,%RDX,8),%XMM5 |
(973) 0x443a47 MOV (%RCX,%R12,8),%R12 |
(973) 0x443a4b VMOVSD (%R14,%R12,8),%XMM6 |
(973) 0x443a51 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(973) 0x443a56 VMOVSD %XMM6,(%R14,%R12,8) |
(973) 0x443a5c MOV -0x8(%RAX,%R11,1),%R12 |
(973) 0x443a61 VMOVSD -0x8(%R14,%R11,1),%XMM6 |
(973) 0x443a68 VMOVSD (%RBX,%RDX,8),%XMM7 |
(973) 0x443a6d MOV (%RCX,%R12,8),%R12 |
(973) 0x443a71 VMOVSD (%R14,%R12,8),%XMM8 |
(973) 0x443a77 VFMADD231SD %XMM6,%XMM7,%XMM8 |
(973) 0x443a7c VMOVSD %XMM8,(%R14,%R12,8) |
(973) 0x443a82 VMOVSD (%R14,%R11,1),%XMM8 |
(973) 0x443a88 VMULSD (%RBX,%RDX,8),%XMM8,%XMM10 |
(973) 0x443a8d MOV (%RAX,%R11,1),%R12 |
(973) 0x443a91 MOV (%RCX,%R12,8),%R12 |
(973) 0x443a95 VADDSD (%R14,%R12,8),%XMM10,%XMM8 |
(973) 0x443a9b VMOVSD %XMM8,(%R14,%R12,8) |
(973) 0x443aa1 VFMADD213SD %XMM10,%XMM5,%XMM4 |
(973) 0x443aa6 VFMADD231SD %XMM2,%XMM3,%XMM4 |
(973) 0x443aab VFMADD231SD %XMM6,%XMM7,%XMM4 |
(973) 0x443ab0 VADDSD %XMM1,%XMM4,%XMM1 |
(973) 0x443ab4 VADDSD %XMM0,%XMM4,%XMM0 |
(973) 0x443ab8 ADD $0x20,%R11 |
(973) 0x443abc DEC %R10 |
(973) 0x443abf JNE 443a10 |
(970) 0x443ac5 MOV %R9,%R10 |
(970) 0x443ac8 AND $-0x4,%R10 |
(970) 0x443acc CMP %R9,%R10 |
(970) 0x443acf MOV -0x70(%RBP),%R11 |
(970) 0x443ad3 JNE 443adf |
(970) 0x443ad5 MOV -0x38(%RBP),%R10 |
(970) 0x443ad9 MOV -0x30(%RBP),%R12 |
(970) 0x443add JMP 443b1f |
(970) 0x443adf ADD %R10,%RDI |
(970) 0x443ae2 MOV -0x38(%RBP),%R10 |
(970) 0x443ae6 MOV -0x30(%RBP),%R12 |
(970) 0x443aea NOPW (%RAX,%RAX,1) |
(974) 0x443af0 MOV (%RAX,%RDI,8),%R9 |
(974) 0x443af4 VMOVSD (%R14,%RDI,8),%XMM2 |
(974) 0x443afa VMULSD (%RBX,%RDX,8),%XMM2,%XMM10 |
(974) 0x443aff MOV (%R12,%R9,8),%R9 |
(974) 0x443b03 VADDSD (%R14,%R9,8),%XMM10,%XMM2 |
(974) 0x443b09 VMOVSD %XMM2,(%R14,%R9,8) |
(974) 0x443b0f VADDSD %XMM1,%XMM10,%XMM1 |
(974) 0x443b13 VADDSD %XMM0,%XMM10,%XMM0 |
(974) 0x443b17 INC %RDI |
(974) 0x443b1a CMP %RDI,%R8 |
(974) 0x443b1d JNE 443af0 |
(970) 0x443b1f MOV -0xb0(%RBP),%RAX |
(970) 0x443b26 MOV (%RAX,%RSI,8),%RDI |
(970) 0x443b2a MOV 0x8(%RAX,%RSI,8),%RSI |
(970) 0x443b2f MOV %RSI,%R8 |
(970) 0x443b32 SUB %RDI,%R8 |
(970) 0x443b35 JLE 443980 |
(970) 0x443b3b CMP $0x4,%R8 |
(970) 0x443b3f JAE 443b46 |
(970) 0x443b41 JMP 443c1e |
(970) 0x443b46 MOV %R8,%R9 |
(970) 0x443b49 SHR $0x2,%R9 |
(970) 0x443b4d LEA 0x18(,%RDI,8),%R10 |
(970) 0x443b55 MOV -0x38(%RBP),%RAX |
(970) 0x443b59 NOPL (%RAX) |
(971) 0x443b60 MOV -0x18(%RAX,%R10,1),%R11 |
(971) 0x443b65 VMOVSD -0x18(%R13,%R10,1),%XMM2 |
(971) 0x443b6c VMOVSD (%RBX,%RDX,8),%XMM3 |
(971) 0x443b71 MOV (%R15,%R11,8),%R11 |
(971) 0x443b75 VMOVSD (%R13,%R11,8),%XMM4 |
(971) 0x443b7c VFMADD231SD %XMM2,%XMM3,%XMM4 |
(971) 0x443b81 VMOVSD %XMM4,(%R13,%R11,8) |
(971) 0x443b88 MOV -0x10(%RAX,%R10,1),%R11 |
(971) 0x443b8d VMOVSD -0x10(%R13,%R10,1),%XMM4 |
(971) 0x443b94 VMOVSD (%RBX,%RDX,8),%XMM5 |
(971) 0x443b99 MOV (%R15,%R11,8),%R11 |
(971) 0x443b9d VMOVSD (%R13,%R11,8),%XMM6 |
(971) 0x443ba4 VFMADD231SD %XMM4,%XMM5,%XMM6 |
(971) 0x443ba9 VMOVSD %XMM6,(%R13,%R11,8) |
(971) 0x443bb0 MOV -0x8(%RAX,%R10,1),%R11 |
(971) 0x443bb5 VMOVSD -0x8(%R13,%R10,1),%XMM6 |
(971) 0x443bbc VMOVSD (%RBX,%RDX,8),%XMM7 |
(971) 0x443bc1 MOV (%R15,%R11,8),%R11 |
(971) 0x443bc5 VMOVSD (%R13,%R11,8),%XMM8 |
(971) 0x443bcc VFMADD231SD %XMM6,%XMM7,%XMM8 |
(971) 0x443bd1 VMOVSD %XMM8,(%R13,%R11,8) |
(971) 0x443bd8 VMOVSD (%R13,%R10,1),%XMM8 |
(971) 0x443bdf VMULSD (%RBX,%RDX,8),%XMM8,%XMM10 |
(971) 0x443be4 MOV (%RAX,%R10,1),%R11 |
(971) 0x443be8 MOV (%R15,%R11,8),%R11 |
(971) 0x443bec VADDSD (%R13,%R11,8),%XMM10,%XMM8 |
(971) 0x443bf3 VMOVSD %XMM8,(%R13,%R11,8) |
(971) 0x443bfa VFMADD213SD %XMM10,%XMM5,%XMM4 |
(971) 0x443bff VFMADD231SD %XMM2,%XMM3,%XMM4 |
(971) 0x443c04 VFMADD231SD %XMM6,%XMM7,%XMM4 |
(971) 0x443c09 VADDSD %XMM1,%XMM4,%XMM1 |
(971) 0x443c0d VADDSD %XMM0,%XMM4,%XMM0 |
(971) 0x443c11 ADD $0x20,%R10 |
(971) 0x443c15 DEC %R9 |
(971) 0x443c18 JNE 443b60 |
(970) 0x443c1e MOV %R8,%R9 |
(970) 0x443c21 AND $-0x4,%R9 |
(970) 0x443c25 CMP %R8,%R9 |
(970) 0x443c28 JNE 443c3f |
(970) 0x443c2a MOV -0x38(%RBP),%R10 |
(970) 0x443c2e MOV -0x50(%RBP),%RAX |
(970) 0x443c32 MOV -0x48(%RBP),%R9 |
(970) 0x443c36 MOV -0x70(%RBP),%R11 |
(970) 0x443c3a JMP 443988 |
(970) 0x443c3f ADD %R9,%RDI |
(970) 0x443c42 MOV -0x38(%RBP),%R10 |
(970) 0x443c46 MOV -0x50(%RBP),%RAX |
(970) 0x443c4a MOV -0x48(%RBP),%R9 |
(970) 0x443c4e MOV -0x70(%RBP),%R11 |
(970) 0x443c52 NOPW %CS:(%RAX,%RAX,1) |
(972) 0x443c60 MOV (%R10,%RDI,8),%R8 |
(972) 0x443c64 VMOVSD (%R13,%RDI,8),%XMM2 |
(972) 0x443c6b VMULSD (%RBX,%RDX,8),%XMM2,%XMM10 |
(972) 0x443c70 MOV (%R15,%R8,8),%R8 |
(972) 0x443c74 VADDSD (%R13,%R8,8),%XMM10,%XMM2 |
(972) 0x443c7b VMOVSD %XMM2,(%R13,%R8,8) |
(972) 0x443c82 VADDSD %XMM1,%XMM10,%XMM1 |
(972) 0x443c86 VADDSD %XMM0,%XMM10,%XMM0 |
(972) 0x443c8a INC %RDI |
(972) 0x443c8d CMP %RDI,%RSI |
(972) 0x443c90 JNE 443c60 |
(970) 0x443c92 JMP 443988 |
0x443ca0 MOV -0x108(%RBP),%RAX |
0x443ca7 MOV (%RAX,%R9,8),%RCX |
0x443cab MOV 0x8(%RAX,%R9,8),%RDX |
0x443cb0 CMP %RDX,%RCX |
0x443cb3 JL 443d5a |
0x443cb9 MOV -0x40(%RBP),%RAX |
0x443cbd VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 |
0x443cc2 VUCOMISD %XMM9,%XMM1 |
0x443cc7 JE 443cd7 |
0x443cc9 VXORPD 0xbb40d(%RIP){1to2},%XMM0,%XMM0 |
0x443cd3 VDIVSD %XMM1,%XMM0,%XMM10 |
0x443cd7 MOV -0xa8(%RBP),%RAX |
0x443cde MOV (%RAX,%R9,8),%RDX |
0x443ce2 MOV 0x8(%RAX,%R9,8),%RAX |
0x443ce7 MOV %RAX,%RSI |
0x443cea SUB %RDX,%RSI |
0x443ced JLE 443e94 |
0x443cf3 MOV %RSI,%RCX |
0x443cf6 AND $-0x4,%RCX |
0x443cfa JE 443e7a |
0x443d00 LEA -0x1(%RCX),%RDI |
0x443d04 VBROADCASTSD %XMM10,%YMM0 |
0x443d09 LEA (%R14,%RDX,8),%R8 |
0x443d0d XOR %R9D,%R9D |
(967) 0x443d10 VMULPD (%R8,%R9,8),%YMM0,%YMM1 |
(967) 0x443d16 VMOVUPD %YMM1,(%R8,%R9,8) |
(967) 0x443d1c ADD $0x4,%R9 |
(967) 0x443d20 CMP %RDI,%R9 |
(967) 0x443d23 JBE 443d10 |
0x443d25 CMP %RCX,%RSI |
0x443d28 MOV -0x48(%RBP),%R9 |
0x443d2c JNE 443e7c |
0x443d32 JMP 443e94 |
(968) 0x443d40 MOV %R12,%R11 |
(968) 0x443d43 INC %RCX |
(968) 0x443d46 CMP %RDX,%RCX |
(968) 0x443d49 MOV -0x38(%RBP),%R10 |
(968) 0x443d4d MOV %R11,%R12 |
(968) 0x443d50 MOV -0x48(%RBP),%R9 |
(968) 0x443d54 JE 443cb9 |
(968) 0x443d5a MOV -0x170(%RBP),%RAX |
(968) 0x443d61 LEA (%RAX,%RCX,8),%RSI |
(968) 0x443d65 CMPQ $0,-0x198(%RBP) |
(968) 0x443d6d JE 443d7d |
(968) 0x443d6f MOV (%RSI),%RSI |
(968) 0x443d72 MOV -0x158(%RBP),%RDI |
(968) 0x443d79 LEA (%RDI,%RSI,8),%RSI |
(968) 0x443d7d MOV (%RSI),%RDI |
(968) 0x443d80 TEST %RDI,%RDI |
(968) 0x443d83 JS 443e30 |
(968) 0x443d89 MOV -0x78(%RBP),%RSI |
(968) 0x443d8d CMP %R9,(%RSI,%RDI,8) |
(968) 0x443d91 JNE 443e30 |
(968) 0x443d97 MOV -0x148(%RBP),%RSI |
(968) 0x443d9e MOV 0x8(%RSI,%RDI,8),%RSI |
(968) 0x443da3 TEST %RSI,%RSI |
(968) 0x443da6 JLE 443e71 |
(968) 0x443dac MOV -0x130(%RBP),%R8 |
(968) 0x443db3 MOV (%R8,%RDI,8),%RDI |
(968) 0x443db7 ADD %RDI,%RSI |
(968) 0x443dba MOV -0x60(%RBP),%R8 |
(968) 0x443dbe MOV (%R8),%R8 |
(968) 0x443dc1 MOV -0x138(%RBP),%R9 |
(968) 0x443dc8 MOV (%R9,%R8,8),%R8 |
(968) 0x443dcc MOV -0xd0(%RBP),%R12 |
(968) 0x443dd3 MOV -0x30(%RBP),%R11 |
(968) 0x443dd7 MOV -0x140(%RBP),%RAX |
(968) 0x443dde XCHG %AX,%AX |
(969) 0x443de0 MOV (%R8,%RDI,8),%R9 |
(969) 0x443de4 VMOVSD (%RAX,%RDI,8),%XMM2 |
(969) 0x443de9 VMULSD (%R12,%RCX,8),%XMM2,%XMM10 |
(969) 0x443def TEST %R9,%R9 |
(969) 0x443df2 LEA (%R15,%R9,8),%R10 |
(969) 0x443df6 NOT %R9 |
(969) 0x443df9 LEA (%R11,%R9,8),%R9 |
(969) 0x443dfd CMOVNS %R10,%R9 |
(969) 0x443e01 MOV %R13,%R10 |
(969) 0x443e04 CMOVS %R14,%R10 |
(969) 0x443e08 MOV (%R9),%R9 |
(969) 0x443e0b VADDSD (%R10,%R9,8),%XMM10,%XMM2 |
(969) 0x443e11 VMOVSD %XMM2,(%R10,%R9,8) |
(969) 0x443e17 VADDSD %XMM1,%XMM10,%XMM1 |
(969) 0x443e1b VADDSD %XMM0,%XMM10,%XMM0 |
(969) 0x443e1f INC %RDI |
(969) 0x443e22 CMP %RSI,%RDI |
(969) 0x443e25 JL 443de0 |
(968) 0x443e27 JMP 443d43 |
(968) 0x443e30 MOV -0x160(%RBP),%RSI |
(968) 0x443e37 CMPQ $-0x3,(%RSI,%RDI,8) |
(968) 0x443e3c JE 443d40 |
(968) 0x443e42 CMPQ $0x1,-0xe8(%RBP) |
(968) 0x443e4a MOV -0xd0(%RBP),%R8 |
(968) 0x443e51 JE 443e6b |
(968) 0x443e53 MOV -0x128(%RBP),%RSI |
(968) 0x443e5a MOV (%RSI,%RDI,8),%RSI |
(968) 0x443e5e MOV -0xc8(%RBP),%RDI |
(968) 0x443e65 CMP (%RDI,%R9,8),%RSI |
(968) 0x443e69 JNE 443e71 |
(968) 0x443e6b VADDSD (%R8,%RCX,8),%XMM0,%XMM0 |
(968) 0x443e71 MOV -0x30(%RBP),%R11 |
(968) 0x443e75 JMP 443d43 |
0x443e7a XOR %ECX,%ECX |
0x443e7c ADD %RDX,%RCX |
0x443e7f NOP |
(966) 0x443e80 VMULSD (%R14,%RCX,8),%XMM10,%XMM0 |
(966) 0x443e86 VMOVSD %XMM0,(%R14,%RCX,8) |
(966) 0x443e8c INC %RCX |
(966) 0x443e8f CMP %RCX,%RAX |
(966) 0x443e92 JNE 443e80 |
0x443e94 MOV -0xb0(%RBP),%RAX |
0x443e9b MOV (%RAX,%R9,8),%RDX |
0x443e9f MOV 0x8(%RAX,%R9,8),%RAX |
0x443ea4 MOV %RAX,%RSI |
0x443ea7 SUB %RDX,%RSI |
0x443eaa JLE 443470 |
0x443eb0 MOV %RSI,%RCX |
0x443eb3 AND $-0x4,%RCX |
0x443eb7 JE 443ef0 |
0x443eb9 LEA -0x1(%RCX),%RDI |
0x443ebd VBROADCASTSD %XMM10,%YMM0 |
0x443ec2 LEA (%R13,%RDX,8),%R8 |
0x443ec7 XOR %R9D,%R9D |
0x443eca NOPW (%RAX,%RAX,1) |
(965) 0x443ed0 VMULPD (%R8,%R9,8),%YMM0,%YMM1 |
(965) 0x443ed6 VMOVUPD %YMM1,(%R8,%R9,8) |
(965) 0x443edc ADD $0x4,%R9 |
(965) 0x443ee0 CMP %RDI,%R9 |
(965) 0x443ee3 JBE 443ed0 |
0x443ee5 CMP %RCX,%RSI |
0x443ee8 JE 443470 |
0x443eee JMP 443ef2 |
0x443ef0 XOR %ECX,%ECX |
0x443ef2 ADD %RDX,%RCX |
0x443ef5 NOPW %CS:(%RAX,%RAX,1) |
(964) 0x443f00 VMULSD (%R13,%RCX,8),%XMM10,%XMM0 |
(964) 0x443f07 VMOVSD %XMM0,(%R13,%RCX,8) |
(964) 0x443f0e INC %RCX |
(964) 0x443f11 CMP %RCX,%RAX |
(964) 0x443f14 JNE 443f00 |
0x443f16 JMP 443470 |
/scratch_na/users/xoserete/qaas_runs/171-415-3872/intel/AMG/build/AMG/AMG/parcsr_ls/par_multi_interp.c: 1747 - 1876 |
-------------------------------------------------------------------------------- |
1747: if (n_fine) |
[...] |
1774: for (i=thread_start; i < thread_stop; i++) |
1775: { |
1776: i1 = pass_array[i]; |
1777: sum_C = 0; |
1778: sum_N = 0; |
1779: j_start = P_diag_start[i1]; |
1780: j_end = j_start+P_diag_i[i1+1]-P_diag_i[i1]; |
1781: cnt = P_diag_i[i1]; |
1782: for (j=j_start; j < j_end; j++) |
1783: { |
1784: k1 = P_diag_pass[pass][j]; |
1785: tmp_array[k1] = cnt; |
1786: P_diag_data[cnt] = 0; |
1787: P_diag_j[cnt++] = k1; |
1788: } |
1789: j_start = P_offd_start[i1]; |
1790: j_end = j_start+P_offd_i[i1+1]-P_offd_i[i1]; |
1791: cnt_offd = P_offd_i[i1]; |
1792: for (j=j_start; j < j_end; j++) |
1793: { |
1794: k1 = P_offd_pass[pass][j]; |
1795: tmp_array_offd[k1] = cnt_offd; |
1796: P_offd_data[cnt_offd] = 0; |
1797: P_offd_j[cnt_offd++] = k1; |
1798: } |
1799: for (j=S_diag_i[i1]; j < S_diag_i[i1+1]; j++) |
1800: { |
1801: j1 = S_diag_j[j]; |
1802: if (assigned[j1] == pass-1) |
1803: tmp_marker[j1] = i1; |
1804: } |
1805: for (j=S_offd_i[i1]; j < S_offd_i[i1+1]; j++) |
1806: { |
1807: j1 = S_offd_j[j]; |
1808: if (assigned_offd[j1] == pass-1) |
1809: tmp_marker_offd[j1] = i1; |
1810: } |
1811: for (j=A_diag_i[i1]+1; j < A_diag_i[i1+1]; j++) |
1812: { |
1813: j1 = A_diag_j[j]; |
1814: if (tmp_marker[j1] == i1) |
1815: { |
1816: for (k=P_diag_i[j1]; k < P_diag_i[j1+1]; k++) |
1817: { |
1818: k1 = P_diag_j[k]; |
1819: alfa = A_diag_data[j]*P_diag_data[k]; |
1820: P_diag_data[tmp_array[k1]] += alfa; |
1821: sum_C += alfa; |
1822: sum_N += alfa; |
1823: } |
1824: for (k=P_offd_i[j1]; k < P_offd_i[j1+1]; k++) |
1825: { |
1826: k1 = P_offd_j[k]; |
1827: alfa = A_diag_data[j]*P_offd_data[k]; |
1828: P_offd_data[tmp_array_offd[k1]] += alfa; |
1829: sum_C += alfa; |
1830: sum_N += alfa; |
1831: } |
1832: } |
1833: else |
1834: { |
1835: if (CF_marker[j1] != -3 && |
1836: (num_functions == 1 || dof_func[i1] == dof_func[j1])) |
1837: sum_N += A_diag_data[j]; |
1838: } |
1839: } |
1840: for (j=A_offd_i[i1]; j < A_offd_i[i1+1]; j++) |
1841: { |
1842: if (col_offd_S_to_A) |
1843: j1 = map_A_to_S[A_offd_j[j]]; |
1844: else |
1845: j1 = A_offd_j[j]; |
1846: |
1847: if (j1 > -1 && tmp_marker_offd[j1] == i1) |
1848: { |
1849: j_start = Pext_start[j1]; |
1850: j_end = j_start+Pext_i[j1+1]; |
1851: for (k=j_start; k < j_end; k++) |
1852: { |
1853: k1 = Pext_pass[pass][k]; |
1854: alfa = A_offd_data[j]*Pext_data[k]; |
1855: if (k1 < 0) |
1856: P_diag_data[tmp_array[-k1-1]] += alfa; |
1857: else |
1858: P_offd_data[tmp_array_offd[k1]] += alfa; |
1859: sum_C += alfa; |
1860: sum_N += alfa; |
1861: } |
1862: } |
1863: else |
1864: { |
1865: if (CF_marker_offd[j1] != -3 && |
1866: (num_functions == 1 || dof_func_offd[j1] == dof_func[i1])) |
1867: sum_N += A_offd_data[j]; |
1868: } |
1869: } |
1870: diagonal = A_diag_data[A_diag_i[i1]]; |
1871: if (sum_C*diagonal) alfa = -sum_N/(sum_C*diagonal); |
1872: |
1873: for (j=P_diag_i[i1]; j < P_diag_i[i1+1]; j++) |
1874: P_diag_data[j] *= alfa; |
1875: for (j=P_offd_i[i1]; j < P_offd_i[i1+1]; j++) |
1876: P_offd_data[j] *= alfa; |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.57 |
CQA speedup if FP arith vectorized | 3.44 |
CQA speedup if fully vectorized | 7.75 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.38 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1774-1776,par_multi_interp.c:1779-1786,par_multi_interp.c:1789-1796,par_multi_interp.c:1799-1799,par_multi_interp.c:1802-1802,par_multi_interp.c:1805-1805,par_multi_interp.c:1808-1808,par_multi_interp.c:1811-1811,par_multi_interp.c:1824-1824,par_multi_interp.c:1840-1840,par_multi_interp.c:1867-1867,par_multi_interp.c:1871-1876 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 36.83 |
CQA cycles if no scalar integer | 14.33 |
CQA cycles if FP arith vectorized | 10.70 |
CQA cycles if fully vectorized | 4.75 |
Front-end cycles | 36.83 |
DIV/SQRT cycles | 13.00 |
P0 cycles | 12.40 |
P1 cycles | 26.67 |
P2 cycles | 26.67 |
P3 cycles | 8.00 |
P4 cycles | 12.40 |
P5 cycles | 13.00 |
P6 cycles | 8.00 |
P7 cycles | 8.00 |
P8 cycles | 8.00 |
P9 cycles | 12.20 |
P10 cycles | 26.67 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 36.50 - 36.51 |
Stall cycles (UFS) | 0.00 |
Nb insns | 214.00 |
Nb uops | 218.00 |
Nb loads | 80.00 |
Nb stores | 14.00 |
Nb stack references | 27.00 |
FLOP/cycle | 0.05 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 21.29 |
Bytes prefetched | 0.00 |
Bytes loaded | 656.00 |
Bytes stored | 128.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 22.92 |
Vectorization ratio load | 30.00 |
Vectorization ratio store | 14.29 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 28.00 |
Vector-efficiency ratio all | 15.36 |
Vector-efficiency ratio load | 16.25 |
Vector-efficiency ratio store | 14.29 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 16.00 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 2.57 |
CQA speedup if FP arith vectorized | 3.44 |
CQA speedup if fully vectorized | 7.75 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.38 |
Bottlenecks | micro-operation queue, |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source | par_multi_interp.c:1747-1747,par_multi_interp.c:1774-1776,par_multi_interp.c:1779-1786,par_multi_interp.c:1789-1796,par_multi_interp.c:1799-1799,par_multi_interp.c:1802-1802,par_multi_interp.c:1805-1805,par_multi_interp.c:1808-1808,par_multi_interp.c:1811-1811,par_multi_interp.c:1824-1824,par_multi_interp.c:1840-1840,par_multi_interp.c:1867-1867,par_multi_interp.c:1871-1876 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 36.83 |
CQA cycles if no scalar integer | 14.33 |
CQA cycles if FP arith vectorized | 10.70 |
CQA cycles if fully vectorized | 4.75 |
Front-end cycles | 36.83 |
DIV/SQRT cycles | 13.00 |
P0 cycles | 12.40 |
P1 cycles | 26.67 |
P2 cycles | 26.67 |
P3 cycles | 8.00 |
P4 cycles | 12.40 |
P5 cycles | 13.00 |
P6 cycles | 8.00 |
P7 cycles | 8.00 |
P8 cycles | 8.00 |
P9 cycles | 12.20 |
P10 cycles | 26.67 |
P11 cycles | 4.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 36.50 - 36.51 |
Stall cycles (UFS) | 0.00 |
Nb insns | 214.00 |
Nb uops | 218.00 |
Nb loads | 80.00 |
Nb stores | 14.00 |
Nb stack references | 27.00 |
FLOP/cycle | 0.05 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 1.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 1.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 21.29 |
Bytes prefetched | 0.00 |
Bytes loaded | 656.00 |
Bytes stored | 128.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 22.92 |
Vectorization ratio load | 30.00 |
Vectorization ratio store | 14.29 |
Vectorization ratio mul | 0.00 |
Vectorization ratio add_sub | NA |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | 0.00 |
Vectorization ratio other | 28.00 |
Vector-efficiency ratio all | 15.36 |
Vector-efficiency ratio load | 16.25 |
Vector-efficiency ratio store | 14.29 |
Vector-efficiency ratio mul | 12.50 |
Vector-efficiency ratio add_sub | NA |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | 12.50 |
Vector-efficiency ratio other | 16.00 |
Path / |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1876 |
Module | exec |
nb instructions | 214 |
nb uops | 218 |
loop length | 963 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 27 |
micro-operation queue | 36.83 cycles |
front end | 36.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 13.00 | 12.40 | 26.67 | 26.67 | 8.00 | 12.40 | 13.00 | 8.00 | 8.00 | 8.00 | 12.20 | 26.67 |
cycles | 13.00 | 12.40 | 26.67 | 26.67 | 8.00 | 12.40 | 13.00 | 8.00 | 8.00 | 8.00 | 12.20 | 26.67 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 36.50-36.51 |
Stall cycles | 0.00 |
Front-end | 36.83 |
Dispatch | 26.67 |
DIV/SQRT | 4.00 |
Overall L1 | 36.83 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 64% |
load | 75% |
store | 100% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 62% |
all | 22% |
load | 30% |
store | 14% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 28% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 20% |
load | 21% |
store | 25% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 20% |
all | 15% |
load | 16% |
store | 14% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0xa0(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0x58(%RBP),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JGE 443f1b <hypre_BoomerAMGBuildMultipass.extracted.28+0xdfb> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x110(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX,%RDX,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x118(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 44369f <hypre_BoomerAMGBuildMultipass.extracted.28+0x57f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 443660 <hypre_BoomerAMGBuildMultipass.extracted.28+0x540> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM10,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
LEA (%R14,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4efbb0 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x40(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%RAX,%RDX,8),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xf8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R12,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x30(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD -0x70(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV -0x98(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 44369f <hypre_BoomerAMGBuildMultipass.extracted.28+0x57f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RCX,%R8,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 44369f <hypre_BoomerAMGBuildMultipass.extracted.28+0x57f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
SAL $0x3,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD (%RAX,%RCX,8),%R8 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x120(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 443880 <hypre_BoomerAMGBuildMultipass.extracted.28+0x760> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 443850 <hypre_BoomerAMGBuildMultipass.extracted.28+0x730> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM10,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
LEA (%R13,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4efbb0 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x98(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x38(%RAX,%R11,8),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xf0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R12,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x40(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD -0x70(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 443880 <hypre_BoomerAMGBuildMultipass.extracted.28+0x760> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RCX,%R11,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 443880 <hypre_BoomerAMGBuildMultipass.extracted.28+0x760> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
SAL $0x3,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD (%RAX,%RDX,8),%R8 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xd8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x30(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE 4438e0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x188(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 4438b8 <hypre_BoomerAMGBuildMultipass.extracted.28+0x798> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xe0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 443940 <hypre_BoomerAMGBuildMultipass.extracted.28+0x820> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x190(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 443918 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7f8> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x100(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 443ca0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 443994 <hypre_BoomerAMGBuildMultipass.extracted.28+0x874> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x108(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 443d5a <hypre_BoomerAMGBuildMultipass.extracted.28+0xc3a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUCOMISD %XMM9,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 443cd7 <hypre_BoomerAMGBuildMultipass.extracted.28+0xbb7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD 0xbb40d(%RIP){1to2},%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VDIVSD %XMM1,%XMM0,%XMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
MOV -0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 443e94 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd74> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 443e7a <hypre_BoomerAMGBuildMultipass.extracted.28+0xd5a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R14,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 443e7c <hypre_BoomerAMGBuildMultipass.extracted.28+0xd5c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 443e94 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd74> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 443470 <hypre_BoomerAMGBuildMultipass.extracted.28+0x350> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 443ef0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xdd0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R13,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 443470 <hypre_BoomerAMGBuildMultipass.extracted.28+0x350> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 443ef2 <hypre_BoomerAMGBuildMultipass.extracted.28+0xdd2> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 443470 <hypre_BoomerAMGBuildMultipass.extracted.28+0x350> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
Function | hypre_BoomerAMGBuildMultipass.extracted.28 |
Source file and lines | par_multi_interp.c:1747-1876 |
Module | exec |
nb instructions | 214 |
nb uops | 218 |
loop length | 963 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 4 |
used ymm registers | 1 |
used zmm registers | 0 |
nb stack references | 27 |
micro-operation queue | 36.83 cycles |
front end | 36.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 13.00 | 12.40 | 26.67 | 26.67 | 8.00 | 12.40 | 13.00 | 8.00 | 8.00 | 8.00 | 12.20 | 26.67 |
cycles | 13.00 | 12.40 | 26.67 | 26.67 | 8.00 | 12.40 | 13.00 | 8.00 | 8.00 | 8.00 | 12.20 | 26.67 |
Cycles executing div or sqrt instructions | 4.00 |
FE+BE cycles | 36.50-36.51 |
Stall cycles | 0.00 |
Front-end | 36.83 |
Dispatch | 26.67 |
DIV/SQRT | 4.00 |
Overall L1 | 36.83 |
all | 5% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 64% |
load | 75% |
store | 100% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 62% |
all | 22% |
load | 30% |
store | 14% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 28% |
all | 13% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 13% |
all | 20% |
load | 21% |
store | 25% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 20% |
all | 15% |
load | 16% |
store | 14% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0xa0(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP -0x58(%RBP),%RDX | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JGE 443f1b <hypre_BoomerAMGBuildMultipass.extracted.28+0xdfb> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x110(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RAX,%RDX,8),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x118(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE 44369f <hypre_BoomerAMGBuildMultipass.extracted.28+0x57f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 443660 <hypre_BoomerAMGBuildMultipass.extracted.28+0x540> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM10,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
LEA (%R14,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4efbb0 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x88(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0x40(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x38(%RAX,%RDX,8),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xf8(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R12,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x30(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD -0x70(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV -0x98(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 44369f <hypre_BoomerAMGBuildMultipass.extracted.28+0x57f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RCX,%R8,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 44369f <hypre_BoomerAMGBuildMultipass.extracted.28+0x57f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
SAL $0x3,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD (%RAX,%RCX,8),%R8 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
MOV -0x50(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x120(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RCX,%R8,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %RAX,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 443880 <hypre_BoomerAMGBuildMultipass.extracted.28+0x760> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %R12,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP $0xd,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 443850 <hypre_BoomerAMGBuildMultipass.extracted.28+0x730> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVUPD %XMM10,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
LEA (%R13,%R12,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDX,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x98(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 4efbb0 <_intel_fast_memset> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV -0x98(%RBP),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x90(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%RCX,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x40(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x38(%RAX,%R11,8),%RDX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV -0xf0(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RSI,%R12,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x40(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RSI,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM9,%XMM9,%XMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVUPD -0x70(%RBP),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
MOV -0x88(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 443880 <hypre_BoomerAMGBuildMultipass.extracted.28+0x760> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
ADD %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SAL $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%RCX,%R11,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 443880 <hypre_BoomerAMGBuildMultipass.extracted.28+0x760> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
SAL $0x3,%R8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV -0xc0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD (%RAX,%RDX,8),%R8 | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xd8(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x30(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE 4438e0 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7c0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x188(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 4438b8 <hypre_BoomerAMGBuildMultipass.extracted.28+0x798> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0xe0(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RCX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 443940 <hypre_BoomerAMGBuildMultipass.extracted.28+0x820> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x60(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RDX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
DEC %RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x190(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 443918 <hypre_BoomerAMGBuildMultipass.extracted.28+0x7f8> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x100(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RCX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM1,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %R11,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 443ca0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xb80> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x50(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0x30(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 443994 <hypre_BoomerAMGBuildMultipass.extracted.28+0x874> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV -0x108(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 443d5a <hypre_BoomerAMGBuildMultipass.extracted.28+0xc3a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD (%RBX,%RAX,8),%XMM1,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VUCOMISD %XMM9,%XMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JE 443cd7 <hypre_BoomerAMGBuildMultipass.extracted.28+0xbb7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VXORPD 0xbb40d(%RIP){1to2},%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VDIVSD %XMM1,%XMM0,%XMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 4 |
MOV -0xa8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 443e94 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd74> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 443e7a <hypre_BoomerAMGBuildMultipass.extracted.28+0xd5a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R14,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 443e7c <hypre_BoomerAMGBuildMultipass.extracted.28+0xd5c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 443e94 <hypre_BoomerAMGBuildMultipass.extracted.28+0xd74> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX,%R9,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RAX,%R9,8),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JLE 443470 <hypre_BoomerAMGBuildMultipass.extracted.28+0x350> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RSI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 443ef0 <hypre_BoomerAMGBuildMultipass.extracted.28+0xdd0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VBROADCASTSD %XMM10,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%R13,%RDX,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 443470 <hypre_BoomerAMGBuildMultipass.extracted.28+0x350> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 443ef2 <hypre_BoomerAMGBuildMultipass.extracted.28+0xdd2> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 443470 <hypre_BoomerAMGBuildMultipass.extracted.28+0x350> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |