Loop Id: 85 | Module: libparcsr_mv.so | Source: par_csr_matop.c:109-242 [...] | Coverage: 0.03% |
---|
Loop Id: 85 | Module: libparcsr_mv.so | Source: par_csr_matop.c:109-242 [...] | Coverage: 0.03% |
---|
0xd980 MOV -0xa0(%RBP),%RAX |
0xd987 MOV -0x68(%RBP),%RCX |
0xd98b MOV %R8,(%RAX,%RCX,8) |
0xd98f MOV -0x98(%RBP),%RAX |
0xd996 MOV %R10,(%RAX,%RCX,8) |
0xd99a MOV -0xc0(%RBP),%RCX |
0xd9a1 LEA 0x1(%RCX),%RDI |
0xd9a5 MOV %RDX,%R10 |
0xd9a8 MOV %RBX,%R8 |
0xd9ab CMP -0xa8(%RBP),%RCX |
0xd9b2 JE d7da |
0xd9b8 CMPQ $0,0x78(%RBP) |
0xd9bd MOV -0xb0(%RBP),%RCX |
0xd9c4 JE d9d2 |
0xd9c6 LEA (%RCX,%RDI,1),%RAX |
0xd9ca MOV %R8,(%R14,%RAX,8) |
0xd9ce LEA 0x1(%R8),%RBX |
0xd9d2 ADD %RDI,%RCX |
0xd9d5 CMPQ $0,0x70(%RBP) |
0xd9da MOV %RDI,-0xc0(%RBP) |
0xd9e1 MOV %RCX,-0x68(%RBP) |
0xd9e5 JE dda0 |
0xd9eb MOV 0x10(%RBP),%RAX |
0xd9ef MOV (%RAX,%RCX,8),%R13 |
0xd9f3 MOV 0x8(%RAX,%RCX,8),%RDX |
0xd9f8 LEA 0x1(%RCX),%R9 |
0xd9fc CMP %RDX,%R13 |
0xd9ff JGE dda4 |
0xda05 MOV %R9,-0x90(%RBP) |
0xda0c MOV %R13,%RAX |
0xda0f NOT %RAX |
0xda12 ADD %RAX,%RDX |
0xda15 MOV %RDX,-0x38(%RBP) |
0xda19 XOR %R9D,%R9D |
0xda1c MOV %R10,%RDX |
0xda1f MOV %R13,-0x30(%RBP) |
0xda23 JMP da46 |
(94) 0xda30 MOV -0x30(%RBP),%R13 |
(94) 0xda34 MOV -0x60(%RBP),%RCX |
(94) 0xda38 LEA 0x1(%RCX),%R9 |
(94) 0xda3c CMP -0x38(%RBP),%RCX |
(94) 0xda40 JE ddb0 |
(94) 0xda46 LEA (%R9,%R13,1),%RAX |
(94) 0xda4a MOV 0x18(%RBP),%RCX |
(94) 0xda4e MOV (%RCX,%RAX,8),%RAX |
(94) 0xda52 MOV 0x50(%RBP),%RCX |
(94) 0xda56 MOV (%RCX,%RAX,8),%R13 |
(94) 0xda5a MOV 0x8(%RCX,%RAX,8),%RDI |
(94) 0xda5f MOV %RDI,%R11 |
(94) 0xda62 SUB %R13,%R11 |
(94) 0xda65 MOV %R9,-0x60(%RBP) |
(94) 0xda69 JLE dab0 |
(94) 0xda6b CMP $0x8,%R11 |
(94) 0xda6f JAE db10 |
(94) 0xda75 MOV %R11,%RCX |
(94) 0xda78 AND $-0x8,%RCX |
(94) 0xda7c CMP %R11,%RCX |
(94) 0xda7f JAE dab0 |
(94) 0xda81 ADD %RCX,%R13 |
(94) 0xda84 MOV 0x58(%RBP),%R9 |
(94) 0xda88 JMP da98 |
(97) 0xda90 INC %R13 |
(97) 0xda93 CMP %R13,%RDI |
(97) 0xda96 JE dab0 |
(97) 0xda98 MOV (%R9,%R13,8),%RCX |
(97) 0xda9c ADD %R15,%RCX |
(97) 0xda9f CMP %R10,(%R14,%RCX,8) |
(97) 0xdaa3 JGE da90 |
(97) 0xdaa5 MOV %RDX,(%R14,%RCX,8) |
(97) 0xdaa9 INC %RDX |
(97) 0xdaac JMP da90 |
(94) 0xdab0 MOV 0x40(%RBP),%RCX |
(94) 0xdab4 MOV (%RCX,%RAX,8),%RDI |
(94) 0xdab8 MOV 0x8(%RCX,%RAX,8),%RAX |
(94) 0xdabd MOV %RAX,%R9 |
(94) 0xdac0 SUB %RDI,%R9 |
(94) 0xdac3 JLE da30 |
(94) 0xdac9 CMP $0x8,%R9 |
(94) 0xdacd JAE dc90 |
(94) 0xdad3 MOV -0x30(%RBP),%R13 |
(94) 0xdad7 MOV %R9,%RCX |
(94) 0xdada AND $-0x8,%RCX |
(94) 0xdade CMP %R9,%RCX |
(94) 0xdae1 JAE da34 |
(94) 0xdae7 ADD %RCX,%RDI |
(94) 0xdaea MOV 0x48(%RBP),%R9 |
(94) 0xdaee JMP dafc |
(95) 0xdaf0 INC %RDI |
(95) 0xdaf3 CMP %RDI,%RAX |
(95) 0xdaf6 JE da34 |
(95) 0xdafc MOV (%R9,%RDI,8),%RCX |
(95) 0xdb00 CMP %R8,(%R14,%RCX,8) |
(95) 0xdb04 JGE daf0 |
(95) 0xdb06 MOV %RBX,(%R14,%RCX,8) |
(95) 0xdb0a INC %RBX |
(95) 0xdb0d JMP daf0 |
(94) 0xdb10 MOV %R11,%R9 |
(94) 0xdb13 SHR $0x3,%R9 |
(94) 0xdb17 MOV -0xd8(%RBP),%RCX |
(94) 0xdb1e LEA (%RCX,%R13,8),%RCX |
(94) 0xdb22 JMP db3d |
(98) 0xdb30 ADD $0x40,%RCX |
(98) 0xdb34 DEC %R9 |
(98) 0xdb37 JE da75 |
(98) 0xdb3d MOV -0x38(%RCX),%R12 |
(98) 0xdb41 ADD %R15,%R12 |
(98) 0xdb44 CMP %R10,(%R14,%R12,8) |
(98) 0xdb48 JGE dbf0 |
(98) 0xdb4e MOV %RDX,(%R14,%R12,8) |
(98) 0xdb52 INC %RDX |
(98) 0xdb55 MOV -0x30(%RCX),%R12 |
(98) 0xdb59 ADD %R15,%R12 |
(98) 0xdb5c CMP %R10,(%R14,%R12,8) |
(98) 0xdb60 JL dc01 |
(98) 0xdb66 MOV -0x28(%RCX),%R12 |
(98) 0xdb6a ADD %R15,%R12 |
(98) 0xdb6d CMP %R10,(%R14,%R12,8) |
(98) 0xdb71 JGE dc19 |
(98) 0xdb77 MOV %RDX,(%R14,%R12,8) |
(98) 0xdb7b INC %RDX |
(98) 0xdb7e MOV -0x20(%RCX),%R12 |
(98) 0xdb82 ADD %R15,%R12 |
(98) 0xdb85 CMP %R10,(%R14,%R12,8) |
(98) 0xdb89 JL dc2a |
(98) 0xdb8f MOV -0x18(%RCX),%R12 |
(98) 0xdb93 ADD %R15,%R12 |
(98) 0xdb96 CMP %R10,(%R14,%R12,8) |
(98) 0xdb9a JGE dc42 |
(98) 0xdba0 MOV %RDX,(%R14,%R12,8) |
(98) 0xdba4 INC %RDX |
(98) 0xdba7 MOV -0x10(%RCX),%R12 |
(98) 0xdbab ADD %R15,%R12 |
(98) 0xdbae CMP %R10,(%R14,%R12,8) |
(98) 0xdbb2 JL dc53 |
(98) 0xdbb8 MOV -0x8(%RCX),%R12 |
(98) 0xdbbc ADD %R15,%R12 |
(98) 0xdbbf CMP %R10,(%R14,%R12,8) |
(98) 0xdbc3 JGE dc6b |
(98) 0xdbc9 MOV %RDX,(%R14,%R12,8) |
(98) 0xdbcd INC %RDX |
(98) 0xdbd0 MOV (%RCX),%R12 |
(98) 0xdbd3 ADD %R15,%R12 |
(98) 0xdbd6 CMP %R10,(%R14,%R12,8) |
(98) 0xdbda JGE db30 |
(98) 0xdbe0 JMP dc7b |
(98) 0xdbf0 MOV -0x30(%RCX),%R12 |
(98) 0xdbf4 ADD %R15,%R12 |
(98) 0xdbf7 CMP %R10,(%R14,%R12,8) |
(98) 0xdbfb JGE db66 |
(98) 0xdc01 MOV %RDX,(%R14,%R12,8) |
(98) 0xdc05 INC %RDX |
(98) 0xdc08 MOV -0x28(%RCX),%R12 |
(98) 0xdc0c ADD %R15,%R12 |
(98) 0xdc0f CMP %R10,(%R14,%R12,8) |
(98) 0xdc13 JL db77 |
(98) 0xdc19 MOV -0x20(%RCX),%R12 |
(98) 0xdc1d ADD %R15,%R12 |
(98) 0xdc20 CMP %R10,(%R14,%R12,8) |
(98) 0xdc24 JGE db8f |
(98) 0xdc2a MOV %RDX,(%R14,%R12,8) |
(98) 0xdc2e INC %RDX |
(98) 0xdc31 MOV -0x18(%RCX),%R12 |
(98) 0xdc35 ADD %R15,%R12 |
(98) 0xdc38 CMP %R10,(%R14,%R12,8) |
(98) 0xdc3c JL dba0 |
(98) 0xdc42 MOV -0x10(%RCX),%R12 |
(98) 0xdc46 ADD %R15,%R12 |
(98) 0xdc49 CMP %R10,(%R14,%R12,8) |
(98) 0xdc4d JGE dbb8 |
(98) 0xdc53 MOV %RDX,(%R14,%R12,8) |
(98) 0xdc57 INC %RDX |
(98) 0xdc5a MOV -0x8(%RCX),%R12 |
(98) 0xdc5e ADD %R15,%R12 |
(98) 0xdc61 CMP %R10,(%R14,%R12,8) |
(98) 0xdc65 JL dbc9 |
(98) 0xdc6b MOV (%RCX),%R12 |
(98) 0xdc6e ADD %R15,%R12 |
(98) 0xdc71 CMP %R10,(%R14,%R12,8) |
(98) 0xdc75 JGE db30 |
(98) 0xdc7b MOV %RDX,(%R14,%R12,8) |
(98) 0xdc7f INC %RDX |
(98) 0xdc82 JMP db30 |
(94) 0xdc90 MOV %R9,%RCX |
(94) 0xdc93 SHR $0x3,%RCX |
(94) 0xdc97 MOV -0xd0(%RBP),%R11 |
(94) 0xdc9e LEA (%R11,%RDI,8),%R11 |
(94) 0xdca2 MOV -0x30(%RBP),%R13 |
(94) 0xdca6 JMP dcbd |
(96) 0xdcb0 ADD $0x40,%R11 |
(96) 0xdcb4 DEC %RCX |
(96) 0xdcb7 JE dad7 |
(96) 0xdcbd MOV -0x38(%R11),%R12 |
(96) 0xdcc1 CMP %R8,(%R14,%R12,8) |
(96) 0xdcc5 JGE dd30 |
(96) 0xdcc7 MOV %RBX,(%R14,%R12,8) |
(96) 0xdccb INC %RBX |
(96) 0xdcce MOV -0x30(%R11),%R12 |
(96) 0xdcd2 CMP %R8,(%R14,%R12,8) |
(96) 0xdcd6 JL dd3a |
(96) 0xdcd8 MOV -0x28(%R11),%R12 |
(96) 0xdcdc CMP %R8,(%R14,%R12,8) |
(96) 0xdce0 JGE dd4b |
(96) 0xdce2 MOV %RBX,(%R14,%R12,8) |
(96) 0xdce6 INC %RBX |
(96) 0xdce9 MOV -0x20(%R11),%R12 |
(96) 0xdced CMP %R8,(%R14,%R12,8) |
(96) 0xdcf1 JL dd55 |
(96) 0xdcf3 MOV -0x18(%R11),%R12 |
(96) 0xdcf7 CMP %R8,(%R14,%R12,8) |
(96) 0xdcfb JGE dd66 |
(96) 0xdcfd MOV %RBX,(%R14,%R12,8) |
(96) 0xdd01 INC %RBX |
(96) 0xdd04 MOV -0x10(%R11),%R12 |
(96) 0xdd08 CMP %R8,(%R14,%R12,8) |
(96) 0xdd0c JL dd70 |
(96) 0xdd0e MOV -0x8(%R11),%R12 |
(96) 0xdd12 CMP %R8,(%R14,%R12,8) |
(96) 0xdd16 JGE dd81 |
(96) 0xdd18 MOV %RBX,(%R14,%R12,8) |
(96) 0xdd1c INC %RBX |
(96) 0xdd1f MOV (%R11),%R12 |
(96) 0xdd22 CMP %R8,(%R14,%R12,8) |
(96) 0xdd26 JGE dcb0 |
(96) 0xdd28 JMP dd8e |
(96) 0xdd30 MOV -0x30(%R11),%R12 |
(96) 0xdd34 CMP %R8,(%R14,%R12,8) |
(96) 0xdd38 JGE dcd8 |
(96) 0xdd3a MOV %RBX,(%R14,%R12,8) |
(96) 0xdd3e INC %RBX |
(96) 0xdd41 MOV -0x28(%R11),%R12 |
(96) 0xdd45 CMP %R8,(%R14,%R12,8) |
(96) 0xdd49 JL dce2 |
(96) 0xdd4b MOV -0x20(%R11),%R12 |
(96) 0xdd4f CMP %R8,(%R14,%R12,8) |
(96) 0xdd53 JGE dcf3 |
(96) 0xdd55 MOV %RBX,(%R14,%R12,8) |
(96) 0xdd59 INC %RBX |
(96) 0xdd5c MOV -0x18(%R11),%R12 |
(96) 0xdd60 CMP %R8,(%R14,%R12,8) |
(96) 0xdd64 JL dcfd |
(96) 0xdd66 MOV -0x10(%R11),%R12 |
(96) 0xdd6a CMP %R8,(%R14,%R12,8) |
(96) 0xdd6e JGE dd0e |
(96) 0xdd70 MOV %RBX,(%R14,%R12,8) |
(96) 0xdd74 INC %RBX |
(96) 0xdd77 MOV -0x8(%R11),%R12 |
(96) 0xdd7b CMP %R8,(%R14,%R12,8) |
(96) 0xdd7f JL dd18 |
(96) 0xdd81 MOV (%R11),%R12 |
(96) 0xdd84 CMP %R8,(%R14,%R12,8) |
(96) 0xdd88 JGE dcb0 |
(96) 0xdd8e MOV %RBX,(%R14,%R12,8) |
(96) 0xdd92 INC %RBX |
(96) 0xdd95 JMP dcb0 |
0xdda0 LEA 0x1(%RCX),%R9 |
0xdda4 MOV %R10,%RDX |
0xdda7 JMP ddbb |
0xddb0 MOV -0x68(%RBP),%RCX |
0xddb4 MOV -0x90(%RBP),%R9 |
0xddbb MOV -0xb8(%RBP),%RAX |
0xddc2 MOV (%RAX,%RCX,8),%R11 |
0xddc6 MOV (%RAX,%R9,8),%R9 |
0xddca CMP %R9,%R11 |
0xddcd JGE d980 |
0xddd3 MOV %R11,%RAX |
0xddd6 NOT %RAX |
0xddd9 ADD %RAX,%R9 |
0xdddc MOV %R9,-0x38(%RBP) |
0xdde0 CMPQ $0,0x88(%RBP) |
0xdde8 MOV %R11,-0x30(%RBP) |
0xddec JE e09a |
0xddf2 XOR %EAX,%EAX |
0xddf4 JMP de16 |
(89) 0xde00 MOV -0x30(%RBP),%R11 |
(89) 0xde04 MOV -0x60(%RBP),%RCX |
(89) 0xde08 LEA 0x1(%RCX),%RAX |
(89) 0xde0c CMP -0x38(%RBP),%RCX |
(89) 0xde10 JE d980 |
(89) 0xde16 MOV %RAX,-0x60(%RBP) |
(89) 0xde1a ADD %R11,%RAX |
(89) 0xde1d MOV -0x78(%RBP),%RCX |
(89) 0xde21 MOV (%RCX,%RAX,8),%RDI |
(89) 0xde25 MOV 0x20(%RBP),%RCX |
(89) 0xde29 MOV (%RCX,%RDI,8),%RAX |
(89) 0xde2d MOV 0x8(%RCX,%RDI,8),%R13 |
(89) 0xde32 MOV %R13,%R9 |
(89) 0xde35 SUB %RAX,%R9 |
(89) 0xde38 JLE dfa4 |
(89) 0xde3e CMP $0x8,%R9 |
(89) 0xde42 JAE de90 |
(89) 0xde44 MOV %R9,%RCX |
(89) 0xde47 AND $-0x8,%RCX |
(89) 0xde4b CMP %R9,%RCX |
(89) 0xde4e JAE dfa0 |
(89) 0xde54 ADD %RCX,%RAX |
(89) 0xde57 MOV 0x28(%RBP),%R9 |
(89) 0xde5b MOV -0x30(%RBP),%R11 |
(89) 0xde5f JMP de7c |
(92) 0xde70 INC %RAX |
(92) 0xde73 CMP %RAX,%R13 |
(92) 0xde76 JE dfa4 |
(92) 0xde7c MOV (%R9,%RAX,8),%RCX |
(92) 0xde80 CMP %R8,(%R14,%RCX,8) |
(92) 0xde84 JGE de70 |
(92) 0xde86 MOV %RBX,(%R14,%RCX,8) |
(92) 0xde8a INC %RBX |
(92) 0xde8d JMP de70 |
(89) 0xde90 MOV %R9,%RCX |
(89) 0xde93 SHR $0x3,%RCX |
(89) 0xde97 MOV -0x70(%RBP),%R11 |
(89) 0xde9b LEA (%R11,%RAX,8),%R11 |
(89) 0xde9f JMP deb9 |
(93) 0xdeb0 ADD $0x40,%R11 |
(93) 0xdeb4 DEC %RCX |
(93) 0xdeb7 JE de44 |
(93) 0xdeb9 MOV -0x38(%R11),%R12 |
(93) 0xdebd CMP %R8,(%R14,%R12,8) |
(93) 0xdec1 JGE df30 |
(93) 0xdec3 MOV %RBX,(%R14,%R12,8) |
(93) 0xdec7 INC %RBX |
(93) 0xdeca MOV -0x30(%R11),%R12 |
(93) 0xdece CMP %R8,(%R14,%R12,8) |
(93) 0xded2 JL df3a |
(93) 0xded4 MOV -0x28(%R11),%R12 |
(93) 0xded8 CMP %R8,(%R14,%R12,8) |
(93) 0xdedc JGE df4b |
(93) 0xdede MOV %RBX,(%R14,%R12,8) |
(93) 0xdee2 INC %RBX |
(93) 0xdee5 MOV -0x20(%R11),%R12 |
(93) 0xdee9 CMP %R8,(%R14,%R12,8) |
(93) 0xdeed JL df55 |
(93) 0xdeef MOV -0x18(%R11),%R12 |
(93) 0xdef3 CMP %R8,(%R14,%R12,8) |
(93) 0xdef7 JGE df66 |
(93) 0xdef9 MOV %RBX,(%R14,%R12,8) |
(93) 0xdefd INC %RBX |
(93) 0xdf00 MOV -0x10(%R11),%R12 |
(93) 0xdf04 CMP %R8,(%R14,%R12,8) |
(93) 0xdf08 JL df70 |
(93) 0xdf0a MOV -0x8(%R11),%R12 |
(93) 0xdf0e CMP %R8,(%R14,%R12,8) |
(93) 0xdf12 JGE df81 |
(93) 0xdf14 MOV %RBX,(%R14,%R12,8) |
(93) 0xdf18 INC %RBX |
(93) 0xdf1b MOV (%R11),%R12 |
(93) 0xdf1e CMP %R8,(%R14,%R12,8) |
(93) 0xdf22 JGE deb0 |
(93) 0xdf24 JMP df8e |
(93) 0xdf30 MOV -0x30(%R11),%R12 |
(93) 0xdf34 CMP %R8,(%R14,%R12,8) |
(93) 0xdf38 JGE ded4 |
(93) 0xdf3a MOV %RBX,(%R14,%R12,8) |
(93) 0xdf3e INC %RBX |
(93) 0xdf41 MOV -0x28(%R11),%R12 |
(93) 0xdf45 CMP %R8,(%R14,%R12,8) |
(93) 0xdf49 JL dede |
(93) 0xdf4b MOV -0x20(%R11),%R12 |
(93) 0xdf4f CMP %R8,(%R14,%R12,8) |
(93) 0xdf53 JGE deef |
(93) 0xdf55 MOV %RBX,(%R14,%R12,8) |
(93) 0xdf59 INC %RBX |
(93) 0xdf5c MOV -0x18(%R11),%R12 |
(93) 0xdf60 CMP %R8,(%R14,%R12,8) |
(93) 0xdf64 JL def9 |
(93) 0xdf66 MOV -0x10(%R11),%R12 |
(93) 0xdf6a CMP %R8,(%R14,%R12,8) |
(93) 0xdf6e JGE df0a |
(93) 0xdf70 MOV %RBX,(%R14,%R12,8) |
(93) 0xdf74 INC %RBX |
(93) 0xdf77 MOV -0x8(%R11),%R12 |
(93) 0xdf7b CMP %R8,(%R14,%R12,8) |
(93) 0xdf7f JL df14 |
(93) 0xdf81 MOV (%R11),%R12 |
(93) 0xdf84 CMP %R8,(%R14,%R12,8) |
(93) 0xdf88 JGE deb0 |
(93) 0xdf8e MOV %RBX,(%R14,%R12,8) |
(93) 0xdf92 INC %RBX |
(93) 0xdf95 JMP deb0 |
(89) 0xdfa0 MOV -0x30(%RBP),%R11 |
(89) 0xdfa4 MOV 0x30(%RBP),%RCX |
(89) 0xdfa8 MOV (%RCX,%RDI,8),%RAX |
(89) 0xdfac MOV 0x8(%RCX,%RDI,8),%RCX |
(89) 0xdfb1 MOV %RCX,%RDI |
(89) 0xdfb4 SUB %RAX,%RDI |
(89) 0xdfb7 JLE de04 |
(89) 0xdfbd CMP $0x4,%RDI |
(89) 0xdfc1 JAE e010 |
(89) 0xdfc3 MOV %RDI,%R9 |
(89) 0xdfc6 AND $-0x4,%R9 |
(89) 0xdfca CMP %RDI,%R9 |
(89) 0xdfcd JAE de00 |
(89) 0xdfd3 ADD %R9,%RAX |
(89) 0xdfd6 MOV 0x38(%RBP),%R9 |
(89) 0xdfda MOV -0x30(%RBP),%R11 |
(89) 0xdfde JMP dfec |
(90) 0xdfe0 INC %RAX |
(90) 0xdfe3 CMP %RAX,%RCX |
(90) 0xdfe6 JE de04 |
(90) 0xdfec MOV (%R9,%RAX,8),%RDI |
(90) 0xdff0 MOV (%RSI,%RDI,8),%RDI |
(90) 0xdff4 ADD %R15,%RDI |
(90) 0xdff7 CMP %R10,(%R14,%RDI,8) |
(90) 0xdffb JGE dfe0 |
(90) 0xdffd MOV %RDX,(%R14,%RDI,8) |
(90) 0xe001 INC %RDX |
(90) 0xe004 JMP dfe0 |
(89) 0xe010 MOV %RDI,%R9 |
(89) 0xe013 SHR $0x2,%R9 |
(89) 0xe017 MOV -0xc8(%RBP),%R11 |
(89) 0xe01e LEA (%R11,%RAX,8),%R11 |
(89) 0xe022 JMP e039 |
(91) 0xe030 ADD $0x20,%R11 |
(91) 0xe034 DEC %R9 |
(91) 0xe037 JE dfc3 |
(91) 0xe039 MOV -0x18(%R11),%R12 |
(91) 0xe03d MOV (%RSI,%R12,8),%R13 |
(91) 0xe041 ADD %R15,%R13 |
(91) 0xe044 CMP %R10,(%R14,%R13,8) |
(91) 0xe048 JGE e051 |
(91) 0xe04a MOV %RDX,(%R14,%R13,8) |
(91) 0xe04e INC %RDX |
(91) 0xe051 MOV -0x10(%R11),%R12 |
(91) 0xe055 MOV (%RSI,%R12,8),%R13 |
(91) 0xe059 ADD %R15,%R13 |
(91) 0xe05c CMP %R10,(%R14,%R13,8) |
(91) 0xe060 JGE e069 |
(91) 0xe062 MOV %RDX,(%R14,%R13,8) |
(91) 0xe066 INC %RDX |
(91) 0xe069 MOV -0x8(%R11),%R12 |
(91) 0xe06d MOV (%RSI,%R12,8),%R13 |
(91) 0xe071 ADD %R15,%R13 |
(91) 0xe074 CMP %R10,(%R14,%R13,8) |
(91) 0xe078 JGE e081 |
(91) 0xe07a MOV %RDX,(%R14,%R13,8) |
(91) 0xe07e INC %RDX |
(91) 0xe081 MOV (%R11),%R12 |
(91) 0xe084 MOV (%RSI,%R12,8),%R13 |
(91) 0xe088 ADD %R15,%R13 |
(91) 0xe08b CMP %R10,(%R14,%R13,8) |
(91) 0xe08f JGE e030 |
(91) 0xe091 MOV %RDX,(%R14,%R13,8) |
(91) 0xe095 INC %RDX |
(91) 0xe098 JMP e030 |
0xe09a XOR %R12D,%R12D |
0xe09d JMP e0b6 |
(86) 0xe0a0 MOV -0x30(%RBP),%R11 |
(86) 0xe0a4 LEA 0x1(%R12),%RAX |
(86) 0xe0a9 CMP -0x38(%RBP),%R12 |
(86) 0xe0ad MOV %RAX,%R12 |
(86) 0xe0b0 JE d980 |
(86) 0xe0b6 LEA (%R11,%R12,1),%RAX |
(86) 0xe0ba MOV -0x78(%RBP),%RCX |
(86) 0xe0be MOV (%RCX,%RAX,8),%RAX |
(86) 0xe0c2 MOV 0x20(%RBP),%RCX |
(86) 0xe0c6 MOV (%RCX,%RAX,8),%RDI |
(86) 0xe0ca MOV 0x8(%RCX,%RAX,8),%RAX |
(86) 0xe0cf MOV %RAX,%R9 |
(86) 0xe0d2 SUB %RDI,%R9 |
(86) 0xe0d5 JLE e0a4 |
(86) 0xe0d7 CMP $0x8,%R9 |
(86) 0xe0db JAE e120 |
(86) 0xe0dd MOV %R9,%RCX |
(86) 0xe0e0 AND $-0x8,%RCX |
(86) 0xe0e4 CMP %R9,%RCX |
(86) 0xe0e7 JAE e0a0 |
(86) 0xe0e9 ADD %RCX,%RDI |
(86) 0xe0ec MOV 0x28(%RBP),%R9 |
(86) 0xe0f0 MOV -0x30(%RBP),%R11 |
(86) 0xe0f4 JMP e108 |
(87) 0xe100 INC %RDI |
(87) 0xe103 CMP %RDI,%RAX |
(87) 0xe106 JE e0a4 |
(87) 0xe108 MOV (%R9,%RDI,8),%RCX |
(87) 0xe10c CMP %R8,(%R14,%RCX,8) |
(87) 0xe110 JGE e100 |
(87) 0xe112 MOV %RBX,(%R14,%RCX,8) |
(87) 0xe116 INC %RBX |
(87) 0xe119 JMP e100 |
(86) 0xe120 MOV %R9,%RCX |
(86) 0xe123 SHR $0x3,%RCX |
(86) 0xe127 MOV -0x70(%RBP),%R11 |
(86) 0xe12b LEA (%R11,%RDI,8),%R11 |
(86) 0xe12f JMP e149 |
(88) 0xe140 ADD $0x40,%R11 |
(88) 0xe144 DEC %RCX |
(88) 0xe147 JE e0dd |
(88) 0xe149 MOV -0x38(%R11),%R13 |
(88) 0xe14d CMP %R8,(%R14,%R13,8) |
(88) 0xe151 JGE e1c0 |
(88) 0xe153 MOV %RBX,(%R14,%R13,8) |
(88) 0xe157 INC %RBX |
(88) 0xe15a MOV -0x30(%R11),%R13 |
(88) 0xe15e CMP %R8,(%R14,%R13,8) |
(88) 0xe162 JL e1ca |
(88) 0xe164 MOV -0x28(%R11),%R13 |
(88) 0xe168 CMP %R8,(%R14,%R13,8) |
(88) 0xe16c JGE e1db |
(88) 0xe16e MOV %RBX,(%R14,%R13,8) |
(88) 0xe172 INC %RBX |
(88) 0xe175 MOV -0x20(%R11),%R13 |
(88) 0xe179 CMP %R8,(%R14,%R13,8) |
(88) 0xe17d JL e1e5 |
(88) 0xe17f MOV -0x18(%R11),%R13 |
(88) 0xe183 CMP %R8,(%R14,%R13,8) |
(88) 0xe187 JGE e1f6 |
(88) 0xe189 MOV %RBX,(%R14,%R13,8) |
(88) 0xe18d INC %RBX |
(88) 0xe190 MOV -0x10(%R11),%R13 |
(88) 0xe194 CMP %R8,(%R14,%R13,8) |
(88) 0xe198 JL e200 |
(88) 0xe19a MOV -0x8(%R11),%R13 |
(88) 0xe19e CMP %R8,(%R14,%R13,8) |
(88) 0xe1a2 JGE e211 |
(88) 0xe1a4 MOV %RBX,(%R14,%R13,8) |
(88) 0xe1a8 INC %RBX |
(88) 0xe1ab MOV (%R11),%R13 |
(88) 0xe1ae CMP %R8,(%R14,%R13,8) |
(88) 0xe1b2 JGE e140 |
(88) 0xe1b4 JMP e21e |
(88) 0xe1c0 MOV -0x30(%R11),%R13 |
(88) 0xe1c4 CMP %R8,(%R14,%R13,8) |
(88) 0xe1c8 JGE e164 |
(88) 0xe1ca MOV %RBX,(%R14,%R13,8) |
(88) 0xe1ce INC %RBX |
(88) 0xe1d1 MOV -0x28(%R11),%R13 |
(88) 0xe1d5 CMP %R8,(%R14,%R13,8) |
(88) 0xe1d9 JL e16e |
(88) 0xe1db MOV -0x20(%R11),%R13 |
(88) 0xe1df CMP %R8,(%R14,%R13,8) |
(88) 0xe1e3 JGE e17f |
(88) 0xe1e5 MOV %RBX,(%R14,%R13,8) |
(88) 0xe1e9 INC %RBX |
(88) 0xe1ec MOV -0x18(%R11),%R13 |
(88) 0xe1f0 CMP %R8,(%R14,%R13,8) |
(88) 0xe1f4 JL e189 |
(88) 0xe1f6 MOV -0x10(%R11),%R13 |
(88) 0xe1fa CMP %R8,(%R14,%R13,8) |
(88) 0xe1fe JGE e19a |
(88) 0xe200 MOV %RBX,(%R14,%R13,8) |
(88) 0xe204 INC %RBX |
(88) 0xe207 MOV -0x8(%R11),%R13 |
(88) 0xe20b CMP %R8,(%R14,%R13,8) |
(88) 0xe20f JL e1a4 |
(88) 0xe211 MOV (%R11),%R13 |
(88) 0xe214 CMP %R8,(%R14,%R13,8) |
(88) 0xe218 JGE e140 |
(88) 0xe21e MOV %RBX,(%R14,%R13,8) |
(88) 0xe222 INC %RBX |
(88) 0xe225 JMP e140 |
/home/eoseret/qaas_runs_CPU_9468/172-019-1763/intel/AMG/build/AMG/AMG/parcsr_mv/par_csr_matop.c: 109 - 242 |
-------------------------------------------------------------------------------- |
109: if (ii < rest) |
[...] |
127: for (i1 = ns; i1 < ne; i1++) |
[...] |
135: if ( allsquare ) { |
136: B_marker[i1] = jj_count_diag; |
137: jj_count_diag++; |
[...] |
144: if (num_cols_offd_A) |
145: { |
146: for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) |
147: { |
148: i2 = A_offd_j[jj2]; |
[...] |
154: for (jj3 = B_ext_offd_i[i2]; jj3 < B_ext_offd_i[i2+1]; jj3++) |
[...] |
164: if (B_marker[i3] < jj_row_begin_offd) |
165: { |
166: B_marker[i3] = jj_count_offd; |
167: jj_count_offd++; |
168: } |
169: } |
170: for (jj3 = B_ext_diag_i[i2]; jj3 < B_ext_diag_i[i2+1]; jj3++) |
171: { |
172: i3 = B_ext_diag_j[jj3]; |
173: |
174: if (B_marker[i3] < jj_row_begin_diag) |
175: { |
176: B_marker[i3] = jj_count_diag; |
177: jj_count_diag++; |
[...] |
187: for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) |
188: { |
189: i2 = A_diag_j[jj2]; |
[...] |
195: for (jj3 = B_diag_i[i2]; jj3 < B_diag_i[i2+1]; jj3++) |
196: { |
197: i3 = B_diag_j[jj3]; |
[...] |
205: if (B_marker[i3] < jj_row_begin_diag) |
206: { |
207: B_marker[i3] = jj_count_diag; |
208: jj_count_diag++; |
[...] |
216: if (num_cols_offd_B) |
217: { |
218: for (jj3 = B_offd_i[i2]; jj3 < B_offd_i[i2+1]; jj3++) |
219: { |
220: i3 = num_cols_diag_B+map_B_to_C[B_offd_j[jj3]]; |
[...] |
228: if (B_marker[i3] < jj_row_begin_offd) |
229: { |
230: B_marker[i3] = jj_count_offd; |
231: jj_count_offd++; |
[...] |
241: (*C_diag_i)[i1] = jj_row_begin_diag; |
242: (*C_offd_i)[i1] = jj_row_begin_offd; |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 5.27 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.07 |
Bottlenecks | micro-operation queue, |
Function | hypre_ParMatmul_RowSizes.extracted |
Source | par_csr_matop.c:127-127,par_csr_matop.c:135-137,par_csr_matop.c:144-146,par_csr_matop.c:187-187,par_csr_matop.c:216-216,par_csr_matop.c:231-231,par_csr_matop.c:241-242 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 9.67 |
CQA cycles if no scalar integer | 9.67 |
CQA cycles if FP arith vectorized | 9.67 |
CQA cycles if fully vectorized | 1.83 |
Front-end cycles | 9.67 |
DIV/SQRT cycles | 5.25 |
P0 cycles | 5.25 |
P1 cycles | 5.25 |
P2 cycles | 5.25 |
P3 cycles | 5.00 |
P4 cycles | 9.00 |
P5 cycles | 9.00 |
P6 cycles | 9.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 0.00 |
P10 cycles | 0.00 |
P11 cycles | 0.00 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 58.00 |
Nb uops | 58.00 |
Nb loads | 17.00 |
Nb stores | 10.00 |
Nb stack references | 14.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 22.34 |
Bytes prefetched | 0.00 |
Bytes loaded | 136.00 |
Bytes stored | 80.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.23 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 11.81 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.00 |
CQA speedup if FP arith vectorized | 1.00 |
CQA speedup if fully vectorized | 5.27 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.07 |
Bottlenecks | micro-operation queue, |
Function | hypre_ParMatmul_RowSizes.extracted |
Source | par_csr_matop.c:127-127,par_csr_matop.c:135-137,par_csr_matop.c:144-146,par_csr_matop.c:187-187,par_csr_matop.c:216-216,par_csr_matop.c:231-231,par_csr_matop.c:241-242 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 9.67 |
CQA cycles if no scalar integer | 9.67 |
CQA cycles if FP arith vectorized | 9.67 |
CQA cycles if fully vectorized | 1.83 |
Front-end cycles | 9.67 |
DIV/SQRT cycles | 5.25 |
P0 cycles | 5.25 |
P1 cycles | 5.25 |
P2 cycles | 5.25 |
P3 cycles | 5.00 |
P4 cycles | 9.00 |
P5 cycles | 9.00 |
P6 cycles | 9.00 |
P7 cycles | 0.00 |
P8 cycles | 0.00 |
P9 cycles | 0.00 |
P10 cycles | 0.00 |
P11 cycles | 0.00 |
P12 cycles | 0.00 |
P13 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | NA |
Stall cycles (UFS) | NA |
Nb insns | 58.00 |
Nb uops | 58.00 |
Nb loads | 17.00 |
Nb stores | 10.00 |
Nb stack references | 14.00 |
FLOP/cycle | 0.00 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 0.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 22.34 |
Bytes prefetched | 0.00 |
Bytes loaded | 136.00 |
Bytes stored | 80.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | NA |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.23 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | NA |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 11.81 |
Path / |
Function | hypre_ParMatmul_RowSizes.extracted |
Source file and lines | par_csr_matop.c:109-242 |
Module | libparcsr_mv.so |
nb instructions | 58 |
nb uops | 58 |
loop length | 249 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 14 |
micro-operation queue | 9.67 cycles |
front end | 9.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.25 | 5.25 | 5.25 | 5.25 | 5.00 | 9.00 | 9.00 | 9.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 5.25 | 5.25 | 5.25 | 5.25 | 5.00 | 9.00 | 9.00 | 9.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 9.67 |
Dispatch | 9.00 |
Overall L1 | 9.67 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R8,(%RAX,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R10,(%RAX,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV -0xc0(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA 0x1(%RCX),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RDX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
MOV %RBX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CMP -0xa8(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JE d7da <hypre_ParMatmul_RowSizes.extracted+0xca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMPQ $0,0x78(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
MOV -0xb0(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JE d9d2 <hypre_ParMatmul_RowSizes.extracted+0x2c2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA (%RCX,%RDI,1),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %R8,(%R14,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA 0x1(%R8),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RDI,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMPQ $0,0x70(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
MOV %RDI,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RCX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JE dda0 <hypre_ParMatmul_RowSizes.extracted+0x690> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RAX,%RCX,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x8(%RAX,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
LEA 0x1(%RCX),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RDX,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE dda4 <hypre_ParMatmul_RowSizes.extracted+0x694> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R9,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
NOT %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RAX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV %RDX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
MOV %R13,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JMP da46 <hypre_ParMatmul_RowSizes.extracted+0x336> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
LEA 0x1(%RCX),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
JMP ddbb <hypre_ParMatmul_RowSizes.extracted+0x6ab> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x90(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RAX,%RCX,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV (%RAX,%R9,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R9,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE d980 <hypre_ParMatmul_RowSizes.extracted+0x270> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R11,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
NOT %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RAX,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %R9,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CMPQ $0,0x88(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
MOV %R11,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JE e09a <hypre_ParMatmul_RowSizes.extracted+0x98a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP de16 <hypre_ParMatmul_RowSizes.extracted+0x706> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
JMP e0b6 <hypre_ParMatmul_RowSizes.extracted+0x9a6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
Function | hypre_ParMatmul_RowSizes.extracted |
Source file and lines | par_csr_matop.c:109-242 |
Module | libparcsr_mv.so |
nb instructions | 58 |
nb uops | 58 |
loop length | 249 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 14 |
micro-operation queue | 9.67 cycles |
front end | 9.67 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.25 | 5.25 | 5.25 | 5.25 | 5.00 | 9.00 | 9.00 | 9.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
cycles | 5.25 | 5.25 | 5.25 | 5.25 | 5.00 | 9.00 | 9.00 | 9.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 |
Cycles executing div or sqrt instructions | NA |
Front-end | 9.67 |
Dispatch | 9.00 |
Overall L1 | 9.67 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R8,(%RAX,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV -0x98(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %R10,(%RAX,%RCX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV -0xc0(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
LEA 0x1(%RCX),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RDX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
MOV %RBX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
CMP -0xa8(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
JE d7da <hypre_ParMatmul_RowSizes.extracted+0xca> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
CMPQ $0,0x78(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
MOV -0xb0(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
JE d9d2 <hypre_ParMatmul_RowSizes.extracted+0x2c2> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
LEA (%RCX,%RDI,1),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %R8,(%R14,%RAX,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
LEA 0x1(%R8),%RBX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RDI,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMPQ $0,0x70(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (12.5%) |
MOV %RDI,-0xc0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RCX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JE dda0 <hypre_ParMatmul_RowSizes.extracted+0x690> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RAX,%RCX,8),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x8(%RAX,%RCX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
LEA 0x1(%RCX),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
CMP %RDX,%R13 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE dda4 <hypre_ParMatmul_RowSizes.extracted+0x694> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R9,-0x90(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
NOT %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RAX,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
MOV %RDX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
MOV %R13,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JMP da46 <hypre_ParMatmul_RowSizes.extracted+0x336> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
LEA 0x1(%RCX),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %R10,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | scal (12.5%) |
JMP ddbb <hypre_ParMatmul_RowSizes.extracted+0x6ab> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
MOV -0x68(%RBP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x90(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0xb8(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV (%RAX,%RCX,8),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV (%RAX,%R9,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R9,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JGE d980 <hypre_ParMatmul_RowSizes.extracted+0x270> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %R11,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
NOT %RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD %RAX,%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %R9,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
CMPQ $0,0x88(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
MOV %R11,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JE e09a <hypre_ParMatmul_RowSizes.extracted+0x98a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
JMP de16 <hypre_ParMatmul_RowSizes.extracted+0x706> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
JMP e0b6 <hypre_ParMatmul_RowSizes.extracted+0x9a6> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |