Function: hypre_ParCSRRelaxThreads.extracted.57 | Module: exec | Source: ams.c:3662-3682 | Coverage: 47.05% |
---|
Function: hypre_ParCSRRelaxThreads.extracted.57 | Module: exec | Source: ams.c:3662-3682 | Coverage: 47.05% |
---|
/home/eoseret/qaas_runs_CPU_9468/172-019-1763/intel/AMG/build/AMG/AMG/parcsr_ls/ams.c: 3662 - 3682 |
-------------------------------------------------------------------------------- |
3662: #pragma omp parallel for private(i,ii,jj,res) HYPRE_SMP_SCHEDULE |
3663: #endif |
3664: for (i = 0; i < n; i++) |
3665: { |
3666: /*----------------------------------------------------------- |
3667: * If diagonal is nonzero, relax point i; otherwise, skip it. |
3668: *-----------------------------------------------------------*/ |
3669: if (A_diag_data[A_diag_i[i]] != zero) |
3670: { |
3671: res = f_data[i]; |
3672: for (jj = A_diag_i[i]; jj < A_diag_i[i+1]; jj++) |
3673: { |
3674: ii = A_diag_j[jj]; |
3675: res -= A_diag_data[jj] * Vtemp_data[ii]; |
3676: } |
3677: for (jj = A_offd_i[i]; jj < A_offd_i[i+1]; jj++) |
3678: { |
3679: ii = A_offd_j[jj]; |
3680: res -= A_offd_data[jj] * Vext_data[ii]; |
3681: } |
3682: u_data[i] += (relax_weight*res)/l1_norms[i]; |
0x520310 PUSH %RBP |
0x520311 MOV %RSP,%RBP |
0x520314 PUSH %R15 |
0x520316 PUSH %R14 |
0x520318 PUSH %R13 |
0x52031a PUSH %R12 |
0x52031c PUSH %RBX |
0x52031d SUB $0x68,%RSP |
0x520321 MOV %RCX,-0x50(%RBP) |
0x520325 MOV %RDX,-0x68(%RBP) |
0x520329 MOV %R9,%R12 |
0x52032c MOV %R8,%R15 |
0x52032f MOV 0x38(%RBP),%RAX |
0x520333 MOV 0x48(%RBP),%RBX |
0x520337 MOV 0x40(%RBP),%R14 |
0x52033b MOV %RAX,-0x58(%RBP) |
0x52033f MOV 0x30(%RBP),%RAX |
0x520343 MOV %RAX,-0x60(%RBP) |
0x520347 MOV 0x28(%RBP),%RAX |
0x52034b MOV %RAX,-0x40(%RBP) |
0x52034f MOV 0x20(%RBP),%RAX |
0x520353 MOV %RAX,-0x48(%RBP) |
0x520357 MOV 0x18(%RBP),%R13 |
0x52035b MOV 0x10(%RBP),%RAX |
0x52035f MOVL $0,-0x70(%RBP) |
0x520366 MOV (%RDI),%ESI |
0x520368 MOVQ $0,-0x78(%RBP) |
0x520370 MOVQ $0x1,-0x88(%RBP) |
0x52037b MOV %RAX,-0x38(%RBP) |
0x52037f SUB $0x8,%RSP |
0x520383 LEA -0x88(%RBP),%RAX |
0x52038a LEA -0x70(%RBP),%RCX |
0x52038e LEA -0x78(%RBP),%R8 |
0x520392 LEA 0x60(%RBP),%R9 |
0x520396 MOV $0x5d8b10,%EDI |
0x52039b MOV %ESI,-0x6c(%RBP) |
0x52039e MOV $0x22,%EDX |
0x5203a3 PUSH $0x1 |
0x5203a5 PUSH $0x1 |
0x5203a7 PUSH %RAX |
0x5203a8 CALL 410420 <__kmpc_for_static_init_8@plt> |
0x5203ad ADD $0x20,%RSP |
0x5203b1 MOV -0x78(%RBP),%R11 |
0x5203b5 MOV 0x60(%RBP),%RAX |
0x5203b9 CMP %R11,%RAX |
0x5203bc JB 520c4a |
0x5203c2 VMOVSD -0x50(%RBP),%XMM0 |
0x5203c7 SUB %R11,%RAX |
0x5203ca LEA 0x1(%RAX),%RAX |
0x5203ce MOV %RAX,-0x80(%RBP) |
0x5203d2 JNE 5204a0 |
0x5203d8 XOR %EDX,%EDX |
0x5203da TESTB $0x1,-0x80(%RBP) |
0x5203de JE 520c4a |
0x5203e4 ADD %RDX,%R11 |
0x5203e7 VPXOR %XMM5,%XMM5,%XMM5 |
0x5203eb MOV (%R12,%R11,8),%RCX |
0x5203ef VUCOMISD (%R15,%RCX,8),%XMM5 |
0x5203f5 JE 520c4a |
0x5203fb MOV -0x58(%RBP),%RAX |
0x5203ff MOV 0x8(%R12,%R11,8),%RSI |
0x520404 VMOVSD (%RAX,%R11,8),%XMM5 |
0x52040a SUB %RCX,%RSI |
0x52040d JLE 520ae4 |
0x520413 MOV %RSI,%RDX |
0x520416 AND $-0x8,%RDX |
0x52041a JE 520a48 |
0x520420 MOV -0x38(%RBP),%RAX |
0x520424 VPBROADCASTQ %R14,%ZMM6 |
0x52042a LEA (%R15,%RCX,8),%R8 |
0x52042e LEA -0x1(%RDX),%RDI |
0x520432 VXORPD %XMM7,%XMM7,%XMM7 |
0x520436 XOR %R10D,%R10D |
0x520439 LEA (%RAX,%RCX,8),%R9 |
0x52043d NOPL (%RAX) |
(4556) 0x520440 VMOVUPD (%R9,%R10,8),%ZMM8 |
(4556) 0x520447 VXORPD %XMM9,%XMM9,%XMM9 |
(4556) 0x52044c KXNORW %K0,%K0,%K1 |
(4556) 0x520450 VGATHERQPD (%R14,%ZMM8,8),%ZMM9{%K1} |
(4556) 0x520457 VFNMADD231PD (%R8,%R10,8),%ZMM9,%ZMM7 |
(4556) 0x52045e ADD $0x8,%R10 |
(4556) 0x520462 CMP %RDI,%R10 |
(4556) 0x520465 JBE 520440 |
0x520467 VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 |
0x52046e VADDPD %ZMM8,%ZMM7,%ZMM7 |
0x520474 VEXTRACTF128 $0x1,%YMM7,%XMM8 |
0x52047a VADDPD %XMM7,%XMM8,%XMM7 |
0x52047e VPERMILPD $0x1,%XMM7,%XMM8 |
0x520484 VADDSD %XMM7,%XMM8,%XMM7 |
0x520488 VADDSD %XMM7,%XMM5,%XMM5 |
0x52048c CMP %RDX,%RSI |
0x52048f JE 520ae4 |
0x520495 VPBROADCASTQ %RSI,%ZMM7 |
0x52049b JMP 520a56 |
0x5204a0 VBROADCASTSD 0x7db56(%RIP),%ZMM9 |
0x5204aa VMOVDQU64 0x8ea4c(%RIP),%ZMM8 |
0x5204b4 VPBROADCASTQ %R14,%ZMM5 |
0x5204ba VPBROADCASTQ %RBX,%ZMM6 |
0x5204c0 AND $-0x2,%RAX |
0x5204c4 VXORPD %XMM7,%XMM7,%XMM7 |
0x5204c8 XOR %EDX,%EDX |
0x5204ca MOV %RAX,-0x50(%RBP) |
0x5204ce MOV %R11,-0x30(%RBP) |
0x5204d2 JMP 52059d |
(4557) 0x5204d7 VPBROADCASTQ %R10,%ZMM13 |
(4557) 0x5204dd XOR %R9D,%R9D |
(4557) 0x5204e0 MOV -0x40(%RBP),%RAX |
(4557) 0x5204e4 VPBROADCASTQ %R9,%ZMM14 |
(4557) 0x5204ea ADD %R9,%R8 |
(4557) 0x5204ed VPSUBQ %ZMM14,%ZMM13,%ZMM13 |
(4557) 0x5204f3 VPXOR %XMM14,%XMM14,%XMM14 |
(4557) 0x5204f8 VPCMPNLEUQ %ZMM8,%ZMM13,%K1 |
(4557) 0x5204ff KMOVQ %K1,%K2 |
(4557) 0x520504 VMOVDQU64 (%RAX,%R8,8),%ZMM13{%K1}{z} |
(4557) 0x52050b MOV -0x48(%RBP),%RAX |
(4557) 0x52050f VMOVUPD (%RAX,%R8,8),%ZMM15{%K1}{z} |
(4557) 0x520516 VMOVDQA64 %ZMM13,%ZMM2{%K1} |
(4557) 0x52051c VPSLLQ $0x3,%ZMM2,%ZMM13 |
(4557) 0x520523 VPADDQ %ZMM13,%ZMM6,%ZMM13 |
(4557) 0x520529 VGATHERQPD (,%ZMM13,1),%ZMM14{%K2} |
(4557) 0x520534 VMOVAPD %ZMM15,%ZMM11{%K1} |
(4557) 0x52053a VMOVAPD %ZMM14,%ZMM1{%K1} |
(4557) 0x520540 VXORPD %ZMM9,%ZMM1,%ZMM14 |
(4557) 0x520546 VMULPD %ZMM14,%ZMM15,%ZMM13{%K1}{z} |
(4557) 0x52054c VEXTRACTF64X4 $0x1,%ZMM13,%YMM14 |
(4557) 0x520553 VADDPD %ZMM14,%ZMM13,%ZMM13 |
(4557) 0x520559 VEXTRACTF128 $0x1,%YMM13,%XMM14 |
(4557) 0x52055f VADDPD %XMM14,%XMM13,%XMM13 |
(4557) 0x520564 VPERMILPD $0x1,%XMM13,%XMM14 |
(4557) 0x52056a VADDSD %XMM14,%XMM13,%XMM13 |
(4557) 0x52056f VADDSD %XMM13,%XMM12,%XMM12 |
(4557) 0x520574 MOV -0x68(%RBP),%RAX |
(4557) 0x520578 VMULSD %XMM0,%XMM12,%XMM12 |
(4557) 0x52057c VDIVSD (%RAX,%RDI,8),%XMM12,%XMM12 |
(4557) 0x520581 MOV -0x60(%RBP),%RAX |
(4557) 0x520585 VADDSD (%RAX,%RDI,8),%XMM12,%XMM12 |
(4557) 0x52058a VMOVSD %XMM12,(%RAX,%RDI,8) |
(4557) 0x52058f ADD $0x2,%RDX |
(4557) 0x520593 CMP -0x50(%RBP),%RDX |
(4557) 0x520597 JE 5203da |
(4557) 0x52059d LEA (%R11,%RDX,1),%RDI |
(4557) 0x5205a1 MOV (%R12,%RDI,8),%R8 |
(4557) 0x5205a5 VUCOMISD (%R15,%R8,8),%XMM7 |
(4557) 0x5205ab JE 520851 |
(4557) 0x5205b1 MOV -0x58(%RBP),%RAX |
(4557) 0x5205b5 MOV 0x8(%R12,%RDI,8),%R10 |
(4557) 0x5205ba VMOVSD (%RAX,%RDI,8),%XMM12 |
(4557) 0x5205bf SUB %R8,%R10 |
(4557) 0x5205c2 JLE 5206f2 |
(4557) 0x5205c8 MOV %R10,%R9 |
(4557) 0x5205cb AND $-0x8,%R9 |
(4557) 0x5205cf JE 52064d |
(4557) 0x5205d1 MOV -0x38(%RBP),%RAX |
(4557) 0x5205d5 LEA (%R15,%R8,8),%RCX |
(4557) 0x5205d9 LEA -0x1(%R9),%R11 |
(4557) 0x5205dd VXORPD %XMM13,%XMM13,%XMM13 |
(4557) 0x5205e2 LEA (%RAX,%R8,8),%RSI |
(4557) 0x5205e6 XOR %EAX,%EAX |
(4557) 0x5205e8 NOPL (%RAX,%RAX,1) |
(4561) 0x5205f0 VMOVUPD (%RSI,%RAX,8),%ZMM14 |
(4561) 0x5205f7 VXORPD %XMM15,%XMM15,%XMM15 |
(4561) 0x5205fc KXNORW %K0,%K0,%K1 |
(4561) 0x520600 VGATHERQPD (%R14,%ZMM14,8),%ZMM15{%K1} |
(4561) 0x520607 VFNMADD231PD (%RCX,%RAX,8),%ZMM15,%ZMM13 |
(4561) 0x52060e ADD $0x8,%RAX |
(4561) 0x520612 CMP %R11,%RAX |
(4561) 0x520615 JBE 5205f0 |
(4557) 0x520617 VEXTRACTF64X4 $0x1,%ZMM13,%YMM14 |
(4557) 0x52061e VADDPD %ZMM14,%ZMM13,%ZMM13 |
(4557) 0x520624 VEXTRACTF128 $0x1,%YMM13,%XMM14 |
(4557) 0x52062a VADDPD %XMM14,%XMM13,%XMM13 |
(4557) 0x52062f VPERMILPD $0x1,%XMM13,%XMM14 |
(4557) 0x520635 VADDSD %XMM14,%XMM13,%XMM13 |
(4557) 0x52063a VADDSD %XMM13,%XMM12,%XMM12 |
(4557) 0x52063f CMP %R9,%R10 |
(4557) 0x520642 JNE 520658 |
(4557) 0x520644 MOV -0x30(%RBP),%R11 |
(4557) 0x520648 JMP 5206f2 |
(4557) 0x52064d VPBROADCASTQ %R10,%ZMM13 |
(4557) 0x520653 XOR %R9D,%R9D |
(4557) 0x520656 JMP 520662 |
(4557) 0x520658 MOV -0x30(%RBP),%R11 |
(4557) 0x52065c VPBROADCASTQ %R10,%ZMM13 |
(4557) 0x520662 MOV -0x38(%RBP),%RAX |
(4557) 0x520666 VPBROADCASTQ %R9,%ZMM14 |
(4557) 0x52066c ADD %R9,%R8 |
(4557) 0x52066f VPSUBQ %ZMM14,%ZMM13,%ZMM13 |
(4557) 0x520675 VPXOR %XMM14,%XMM14,%XMM14 |
(4557) 0x52067a VPCMPNLEUQ %ZMM8,%ZMM13,%K1 |
(4557) 0x520681 VMOVUPD (%R15,%R8,8),%ZMM15{%K1}{z} |
(4557) 0x520688 KMOVQ %K1,%K2 |
(4557) 0x52068d VMOVDQU64 (%RAX,%R8,8),%ZMM13{%K1}{z} |
(4557) 0x520694 VMOVAPD %ZMM15,%ZMM10{%K1} |
(4557) 0x52069a VMOVDQA64 %ZMM13,%ZMM4{%K1} |
(4557) 0x5206a0 VPSLLQ $0x3,%ZMM4,%ZMM13 |
(4557) 0x5206a7 VPADDQ %ZMM13,%ZMM5,%ZMM13 |
(4557) 0x5206ad VGATHERQPD (,%ZMM13,1),%ZMM14{%K2} |
(4557) 0x5206b8 VMOVAPD %ZMM14,%ZMM3{%K1} |
(4557) 0x5206be VXORPD %ZMM9,%ZMM3,%ZMM14 |
(4557) 0x5206c4 VMULPD %ZMM14,%ZMM15,%ZMM13{%K1}{z} |
(4557) 0x5206ca VEXTRACTF64X4 $0x1,%ZMM13,%YMM14 |
(4557) 0x5206d1 VADDPD %ZMM14,%ZMM13,%ZMM13 |
(4557) 0x5206d7 VEXTRACTF128 $0x1,%YMM13,%XMM14 |
(4557) 0x5206dd VADDPD %XMM14,%XMM13,%XMM13 |
(4557) 0x5206e2 VPERMILPD $0x1,%XMM13,%XMM14 |
(4557) 0x5206e8 VADDSD %XMM14,%XMM13,%XMM13 |
(4557) 0x5206ed VADDSD %XMM13,%XMM12,%XMM12 |
(4557) 0x5206f2 MOV (%R13,%RDI,8),%R8 |
(4557) 0x5206f7 MOV 0x8(%R13,%RDI,8),%R10 |
(4557) 0x5206fc SUB %R8,%R10 |
(4557) 0x5206ff JLE 520836 |
(4557) 0x520705 MOV %R10,%R9 |
(4557) 0x520708 AND $-0x8,%R9 |
(4557) 0x52070c JE 52078d |
(4557) 0x520712 MOV -0x48(%RBP),%RAX |
(4557) 0x520716 LEA -0x1(%R9),%R11 |
(4557) 0x52071a VXORPD %XMM13,%XMM13,%XMM13 |
(4557) 0x52071f LEA (%RAX,%R8,8),%RCX |
(4557) 0x520723 MOV -0x40(%RBP),%RAX |
(4557) 0x520727 LEA (%RAX,%R8,8),%RSI |
(4557) 0x52072b XOR %EAX,%EAX |
(4557) 0x52072d NOPL (%RAX) |
(4560) 0x520730 VMOVUPD (%RSI,%RAX,8),%ZMM14 |
(4560) 0x520737 VXORPD %XMM15,%XMM15,%XMM15 |
(4560) 0x52073c KXNORW %K0,%K0,%K1 |
(4560) 0x520740 VGATHERQPD (%RBX,%ZMM14,8),%ZMM15{%K1} |
(4560) 0x520747 VFNMADD231PD (%RCX,%RAX,8),%ZMM15,%ZMM13 |
(4560) 0x52074e ADD $0x8,%RAX |
(4560) 0x520752 CMP %R11,%RAX |
(4560) 0x520755 JBE 520730 |
(4557) 0x520757 VEXTRACTF64X4 $0x1,%ZMM13,%YMM14 |
(4557) 0x52075e VADDPD %ZMM14,%ZMM13,%ZMM13 |
(4557) 0x520764 VEXTRACTF128 $0x1,%YMM13,%XMM14 |
(4557) 0x52076a VADDPD %XMM14,%XMM13,%XMM13 |
(4557) 0x52076f VPERMILPD $0x1,%XMM13,%XMM14 |
(4557) 0x520775 VADDSD %XMM14,%XMM13,%XMM13 |
(4557) 0x52077a VADDSD %XMM13,%XMM12,%XMM12 |
(4557) 0x52077f CMP %R9,%R10 |
(4557) 0x520782 JNE 520798 |
(4557) 0x520784 MOV -0x30(%RBP),%R11 |
(4557) 0x520788 JMP 520836 |
(4557) 0x52078d VPBROADCASTQ %R10,%ZMM13 |
(4557) 0x520793 XOR %R9D,%R9D |
(4557) 0x520796 JMP 5207a2 |
(4557) 0x520798 MOV -0x30(%RBP),%R11 |
(4557) 0x52079c VPBROADCASTQ %R10,%ZMM13 |
(4557) 0x5207a2 MOV -0x40(%RBP),%RAX |
(4557) 0x5207a6 VPBROADCASTQ %R9,%ZMM14 |
(4557) 0x5207ac ADD %R9,%R8 |
(4557) 0x5207af VPSUBQ %ZMM14,%ZMM13,%ZMM13 |
(4557) 0x5207b5 VPXOR %XMM14,%XMM14,%XMM14 |
(4557) 0x5207ba VPCMPNLEUQ %ZMM8,%ZMM13,%K1 |
(4557) 0x5207c1 KMOVQ %K1,%K2 |
(4557) 0x5207c6 VMOVDQU64 (%RAX,%R8,8),%ZMM13{%K1}{z} |
(4557) 0x5207cd MOV -0x48(%RBP),%RAX |
(4557) 0x5207d1 VMOVUPD (%RAX,%R8,8),%ZMM15{%K1}{z} |
(4557) 0x5207d8 VMOVDQA64 %ZMM13,%ZMM2{%K1} |
(4557) 0x5207de VPSLLQ $0x3,%ZMM2,%ZMM13 |
(4557) 0x5207e5 VPADDQ %ZMM13,%ZMM6,%ZMM13 |
(4557) 0x5207eb VGATHERQPD (,%ZMM13,1),%ZMM14{%K2} |
(4557) 0x5207f6 VMOVAPD %ZMM15,%ZMM11{%K1} |
(4557) 0x5207fc VMOVAPD %ZMM14,%ZMM1{%K1} |
(4557) 0x520802 VXORPD %ZMM9,%ZMM1,%ZMM14 |
(4557) 0x520808 VMULPD %ZMM14,%ZMM15,%ZMM13{%K1}{z} |
(4557) 0x52080e VEXTRACTF64X4 $0x1,%ZMM13,%YMM14 |
(4557) 0x520815 VADDPD %ZMM14,%ZMM13,%ZMM13 |
(4557) 0x52081b VEXTRACTF128 $0x1,%YMM13,%XMM14 |
(4557) 0x520821 VADDPD %XMM14,%XMM13,%XMM13 |
(4557) 0x520826 VPERMILPD $0x1,%XMM13,%XMM14 |
(4557) 0x52082c VADDSD %XMM14,%XMM13,%XMM13 |
(4557) 0x520831 VADDSD %XMM13,%XMM12,%XMM12 |
(4557) 0x520836 MOV -0x68(%RBP),%RAX |
(4557) 0x52083a VMULSD %XMM0,%XMM12,%XMM12 |
(4557) 0x52083e VDIVSD (%RAX,%RDI,8),%XMM12,%XMM12 |
(4557) 0x520843 MOV -0x60(%RBP),%RAX |
(4557) 0x520847 VADDSD (%RAX,%RDI,8),%XMM12,%XMM12 |
(4557) 0x52084c VMOVSD %XMM12,(%RAX,%RDI,8) |
(4557) 0x520851 MOV 0x8(%R12,%RDI,8),%R8 |
(4557) 0x520856 VUCOMISD (%R15,%R8,8),%XMM7 |
(4557) 0x52085c JE 52058f |
(4557) 0x520862 MOV -0x58(%RBP),%RAX |
(4557) 0x520866 LEA 0x1(%RDX,%R11,1),%RDI |
(4557) 0x52086b MOV 0x8(%R12,%RDI,8),%R10 |
(4557) 0x520870 VMOVSD (%RAX,%RDI,8),%XMM12 |
(4557) 0x520875 SUB %R8,%R10 |
(4557) 0x520878 JLE 5209a2 |
(4557) 0x52087e MOV %R10,%R9 |
(4557) 0x520881 AND $-0x8,%R9 |
(4557) 0x520885 JE 5208fd |
(4557) 0x520887 MOV -0x38(%RBP),%RAX |
(4557) 0x52088b LEA (%R15,%R8,8),%RCX |
(4557) 0x52088f LEA -0x1(%R9),%R11 |
(4557) 0x520893 VXORPD %XMM13,%XMM13,%XMM13 |
(4557) 0x520898 LEA (%RAX,%R8,8),%RSI |
(4557) 0x52089c XOR %EAX,%EAX |
(4557) 0x52089e XCHG %AX,%AX |
(4559) 0x5208a0 VMOVUPD (%RSI,%RAX,8),%ZMM14 |
(4559) 0x5208a7 VXORPD %XMM15,%XMM15,%XMM15 |
(4559) 0x5208ac KXNORW %K0,%K0,%K1 |
(4559) 0x5208b0 VGATHERQPD (%R14,%ZMM14,8),%ZMM15{%K1} |
(4559) 0x5208b7 VFNMADD231PD (%RCX,%RAX,8),%ZMM15,%ZMM13 |
(4559) 0x5208be ADD $0x8,%RAX |
(4559) 0x5208c2 CMP %R11,%RAX |
(4559) 0x5208c5 JBE 5208a0 |
(4557) 0x5208c7 VEXTRACTF64X4 $0x1,%ZMM13,%YMM14 |
(4557) 0x5208ce VADDPD %ZMM14,%ZMM13,%ZMM13 |
(4557) 0x5208d4 VEXTRACTF128 $0x1,%YMM13,%XMM14 |
(4557) 0x5208da VADDPD %XMM14,%XMM13,%XMM13 |
(4557) 0x5208df VPERMILPD $0x1,%XMM13,%XMM14 |
(4557) 0x5208e5 VADDSD %XMM14,%XMM13,%XMM13 |
(4557) 0x5208ea VADDSD %XMM13,%XMM12,%XMM12 |
(4557) 0x5208ef CMP %R9,%R10 |
(4557) 0x5208f2 JNE 520908 |
(4557) 0x5208f4 MOV -0x30(%RBP),%R11 |
(4557) 0x5208f8 JMP 5209a2 |
(4557) 0x5208fd VPBROADCASTQ %R10,%ZMM13 |
(4557) 0x520903 XOR %R9D,%R9D |
(4557) 0x520906 JMP 520912 |
(4557) 0x520908 MOV -0x30(%RBP),%R11 |
(4557) 0x52090c VPBROADCASTQ %R10,%ZMM13 |
(4557) 0x520912 MOV -0x38(%RBP),%RAX |
(4557) 0x520916 VPBROADCASTQ %R9,%ZMM14 |
(4557) 0x52091c ADD %R9,%R8 |
(4557) 0x52091f VPSUBQ %ZMM14,%ZMM13,%ZMM13 |
(4557) 0x520925 VPXOR %XMM14,%XMM14,%XMM14 |
(4557) 0x52092a VPCMPNLEUQ %ZMM8,%ZMM13,%K1 |
(4557) 0x520931 VMOVUPD (%R15,%R8,8),%ZMM15{%K1}{z} |
(4557) 0x520938 KMOVQ %K1,%K2 |
(4557) 0x52093d VMOVDQU64 (%RAX,%R8,8),%ZMM13{%K1}{z} |
(4557) 0x520944 VMOVAPD %ZMM15,%ZMM10{%K1} |
(4557) 0x52094a VMOVDQA64 %ZMM13,%ZMM4{%K1} |
(4557) 0x520950 VPSLLQ $0x3,%ZMM4,%ZMM13 |
(4557) 0x520957 VPADDQ %ZMM13,%ZMM5,%ZMM13 |
(4557) 0x52095d VGATHERQPD (,%ZMM13,1),%ZMM14{%K2} |
(4557) 0x520968 VMOVAPD %ZMM14,%ZMM3{%K1} |
(4557) 0x52096e VXORPD %ZMM9,%ZMM3,%ZMM14 |
(4557) 0x520974 VMULPD %ZMM14,%ZMM15,%ZMM13{%K1}{z} |
(4557) 0x52097a VEXTRACTF64X4 $0x1,%ZMM13,%YMM14 |
(4557) 0x520981 VADDPD %ZMM14,%ZMM13,%ZMM13 |
(4557) 0x520987 VEXTRACTF128 $0x1,%YMM13,%XMM14 |
(4557) 0x52098d VADDPD %XMM14,%XMM13,%XMM13 |
(4557) 0x520992 VPERMILPD $0x1,%XMM13,%XMM14 |
(4557) 0x520998 VADDSD %XMM14,%XMM13,%XMM13 |
(4557) 0x52099d VADDSD %XMM13,%XMM12,%XMM12 |
(4557) 0x5209a2 MOV (%R13,%RDI,8),%R8 |
(4557) 0x5209a7 MOV 0x8(%R13,%RDI,8),%R10 |
(4557) 0x5209ac SUB %R8,%R10 |
(4557) 0x5209af JLE 520574 |
(4557) 0x5209b5 MOV %R10,%R9 |
(4557) 0x5209b8 AND $-0x8,%R9 |
(4557) 0x5209bc JE 5204d7 |
(4557) 0x5209c2 MOV -0x48(%RBP),%RAX |
(4557) 0x5209c6 LEA -0x1(%R9),%R11 |
(4557) 0x5209ca VXORPD %XMM13,%XMM13,%XMM13 |
(4557) 0x5209cf LEA (%RAX,%R8,8),%RCX |
(4557) 0x5209d3 MOV -0x40(%RBP),%RAX |
(4557) 0x5209d7 LEA (%RAX,%R8,8),%RSI |
(4557) 0x5209db XOR %EAX,%EAX |
(4557) 0x5209dd NOPL (%RAX) |
(4558) 0x5209e0 VMOVUPD (%RSI,%RAX,8),%ZMM14 |
(4558) 0x5209e7 VXORPD %XMM15,%XMM15,%XMM15 |
(4558) 0x5209ec KXNORW %K0,%K0,%K1 |
(4558) 0x5209f0 VGATHERQPD (%RBX,%ZMM14,8),%ZMM15{%K1} |
(4558) 0x5209f7 VFNMADD231PD (%RCX,%RAX,8),%ZMM15,%ZMM13 |
(4558) 0x5209fe ADD $0x8,%RAX |
(4558) 0x520a02 CMP %R11,%RAX |
(4558) 0x520a05 JBE 5209e0 |
(4557) 0x520a07 VEXTRACTF64X4 $0x1,%ZMM13,%YMM14 |
(4557) 0x520a0e MOV -0x30(%RBP),%R11 |
(4557) 0x520a12 VADDPD %ZMM14,%ZMM13,%ZMM13 |
(4557) 0x520a18 VEXTRACTF128 $0x1,%YMM13,%XMM14 |
(4557) 0x520a1e VADDPD %XMM14,%XMM13,%XMM13 |
(4557) 0x520a23 VPERMILPD $0x1,%XMM13,%XMM14 |
(4557) 0x520a29 VADDSD %XMM14,%XMM13,%XMM13 |
(4557) 0x520a2e VADDSD %XMM13,%XMM12,%XMM12 |
(4557) 0x520a33 CMP %R9,%R10 |
(4557) 0x520a36 JNE 520a3d |
(4557) 0x520a38 JMP 520574 |
(4557) 0x520a3d VPBROADCASTQ %R10,%ZMM13 |
(4557) 0x520a43 JMP 5204e0 |
0x520a48 VPBROADCASTQ %RSI,%ZMM7 |
0x520a4e VPBROADCASTQ %R14,%ZMM6 |
0x520a54 XOR %EDX,%EDX |
0x520a56 VPBROADCASTQ %RDX,%ZMM8 |
0x520a5c MOV -0x38(%RBP),%RAX |
0x520a60 ADD %RDX,%RCX |
0x520a63 VPSUBQ %ZMM8,%ZMM7,%ZMM7 |
0x520a69 VPCMPNLEUQ 0x8e48c(%RIP),%ZMM7,%K1 |
0x520a74 VMOVDQU64 (%RAX,%RCX,8),%ZMM7{%K1}{z} |
0x520a7b KMOVQ %K1,%K2 |
0x520a80 VMOVDQA64 %ZMM7,%ZMM4{%K1} |
0x520a86 VPSLLQ $0x3,%ZMM4,%ZMM4 |
0x520a8d VPADDQ %ZMM4,%ZMM6,%ZMM4 |
0x520a93 VPXOR %XMM6,%XMM6,%XMM6 |
0x520a97 VGATHERQPD (,%ZMM4,1),%ZMM6{%K2} |
0x520aa2 VMOVUPD (%R15,%RCX,8),%ZMM4{%K1}{z} |
0x520aa9 VMOVAPD %ZMM6,%ZMM3{%K1} |
0x520aaf VXORPD 0x7d547(%RIP){1to8},%ZMM3,%ZMM3 |
0x520ab9 VMULPD %ZMM3,%ZMM4,%ZMM3{%K1}{z} |
0x520abf VEXTRACTF64X4 $0x1,%ZMM3,%YMM4 |
0x520ac6 VADDPD %ZMM4,%ZMM3,%ZMM3 |
0x520acc VEXTRACTF128 $0x1,%YMM3,%XMM4 |
0x520ad2 VADDPD %XMM4,%XMM3,%XMM3 |
0x520ad6 VPERMILPD $0x1,%XMM3,%XMM4 |
0x520adc VADDSD %XMM4,%XMM3,%XMM3 |
0x520ae0 VADDSD %XMM3,%XMM5,%XMM5 |
0x520ae4 MOV (%R13,%R11,8),%RCX |
0x520ae9 MOV 0x8(%R13,%R11,8),%RSI |
0x520aee SUB %RCX,%RSI |
0x520af1 JLE 520c2c |
0x520af7 MOV %RSI,%RDX |
0x520afa AND $-0x8,%RDX |
0x520afe JE 520b8c |
0x520b04 MOV -0x40(%RBP),%R9 |
0x520b08 MOV -0x48(%RBP),%RAX |
0x520b0c VPBROADCASTQ %RBX,%ZMM3 |
0x520b12 LEA -0x1(%RDX),%RDI |
0x520b16 VXORPD %XMM4,%XMM4,%XMM4 |
0x520b1a XOR %R10D,%R10D |
0x520b1d LEA (%RAX,%RCX,8),%R8 |
0x520b21 LEA (%R9,%RCX,8),%R9 |
0x520b25 NOPW %CS:(%RAX,%RAX,1) |
(4555) 0x520b30 VMOVUPD (%R9,%R10,8),%ZMM6 |
(4555) 0x520b37 KXNORW %K0,%K0,%K1 |
(4555) 0x520b3b VPXOR %XMM7,%XMM7,%XMM7 |
(4555) 0x520b3f VGATHERQPD (%RBX,%ZMM6,8),%ZMM7{%K1} |
(4555) 0x520b46 VFNMADD231PD (%R8,%R10,8),%ZMM7,%ZMM4 |
(4555) 0x520b4d ADD $0x8,%R10 |
(4555) 0x520b51 CMP %RDI,%R10 |
(4555) 0x520b54 JBE 520b30 |
0x520b56 VEXTRACTF64X4 $0x1,%ZMM4,%YMM6 |
0x520b5d VADDPD %ZMM6,%ZMM4,%ZMM4 |
0x520b63 VEXTRACTF128 $0x1,%YMM4,%XMM6 |
0x520b69 VADDPD %XMM6,%XMM4,%XMM4 |
0x520b6d VPERMILPD $0x1,%XMM4,%XMM6 |
0x520b73 VADDSD %XMM6,%XMM4,%XMM4 |
0x520b77 VADDSD %XMM4,%XMM5,%XMM5 |
0x520b7b CMP %RDX,%RSI |
0x520b7e JE 520c2c |
0x520b84 VPBROADCASTQ %RSI,%ZMM4 |
0x520b8a JMP 520b9a |
0x520b8c VPBROADCASTQ %RSI,%ZMM4 |
0x520b92 VPBROADCASTQ %RBX,%ZMM3 |
0x520b98 XOR %EDX,%EDX |
0x520b9a VPBROADCASTQ %RDX,%ZMM6 |
0x520ba0 MOV -0x40(%RBP),%RAX |
0x520ba4 ADD %RDX,%RCX |
0x520ba7 VPSUBQ %ZMM6,%ZMM4,%ZMM4 |
0x520bad VPCMPNLEUQ 0x8e348(%RIP),%ZMM4,%K1 |
0x520bb8 VMOVDQU64 (%RAX,%RCX,8),%ZMM4{%K1}{z} |
0x520bbf MOV -0x48(%RBP),%RAX |
0x520bc3 KMOVQ %K1,%K2 |
0x520bc8 VMOVDQA64 %ZMM4,%ZMM2{%K1} |
0x520bce VPSLLQ $0x3,%ZMM2,%ZMM2 |
0x520bd5 VPADDQ %ZMM2,%ZMM3,%ZMM2 |
0x520bdb VPXOR %XMM3,%XMM3,%XMM3 |
0x520bdf VGATHERQPD (,%ZMM2,1),%ZMM3{%K2} |
0x520bea VMOVUPD (%RAX,%RCX,8),%ZMM2{%K1}{z} |
0x520bf1 VMOVAPD %ZMM3,%ZMM1{%K1} |
0x520bf7 VXORPD 0x7d3ff(%RIP){1to8},%ZMM1,%ZMM1 |
0x520c01 VMULPD %ZMM1,%ZMM2,%ZMM1{%K1}{z} |
0x520c07 VEXTRACTF64X4 $0x1,%ZMM1,%YMM2 |
0x520c0e VADDPD %ZMM2,%ZMM1,%ZMM1 |
0x520c14 VEXTRACTF128 $0x1,%YMM1,%XMM2 |
0x520c1a VADDPD %XMM2,%XMM1,%XMM1 |
0x520c1e VPERMILPD $0x1,%XMM1,%XMM2 |
0x520c24 VADDSD %XMM2,%XMM1,%XMM1 |
0x520c28 VADDSD %XMM1,%XMM5,%XMM5 |
0x520c2c MOV -0x68(%RBP),%RAX |
0x520c30 VMULSD %XMM0,%XMM5,%XMM0 |
0x520c34 VDIVSD (%RAX,%R11,8),%XMM0,%XMM0 |
0x520c3a MOV -0x60(%RBP),%RAX |
0x520c3e VADDSD (%RAX,%R11,8),%XMM0,%XMM0 |
0x520c44 VMOVSD %XMM0,(%RAX,%R11,8) |
0x520c4a MOV -0x6c(%RBP),%ESI |
0x520c4d MOV $0x5d8b30,%EDI |
0x520c52 ADD $0x68,%RSP |
0x520c56 POP %RBX |
0x520c57 POP %R12 |
0x520c59 POP %R13 |
0x520c5b POP %R14 |
0x520c5d POP %R15 |
0x520c5f POP %RBP |
0x520c60 VZEROUPPER |
0x520c63 JMP 410170 |
0x520c68 NOPL (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►99.97+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | ams.c:3662-3682 |
Module | exec |
nb instructions | 194 |
nb uops | 297 |
loop length | 930 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 9 |
used ymm registers | 7 |
used zmm registers | 9 |
nb stack references | 22 |
ADD-SUB / MUL ratio | 5.67 |
micro-operation queue | 49.50 cycles |
front end | 49.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 9.75 | 9.75 | 9.50 | 9.50 | 7.50 | 18.33 | 18.33 | 18.33 | 24.00 | 25.17 | 24.83 | 25.00 | 11.00 | 11.00 |
cycles | 9.75 | 9.75 | 9.50 | 9.50 | 7.50 | 20.67 | 20.67 | 20.67 | 26.00 | 25.17 | 25.33 | 28.50 | 11.00 | 11.00 |
Cycles executing div or sqrt instructions | 5.00 |
Front-end | 49.50 |
Dispatch | 28.50 |
DIV/SQRT | 5.00 |
Overall L1 | 49.50 |
all | 29% |
load | 41% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 35% |
all | 67% |
load | 50% |
store | 0% |
mul | 66% |
add-sub | 47% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 91% |
all | 47% |
load | 45% |
store | 0% |
mul | 66% |
add-sub | 52% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 60% |
all | 32% |
load | 47% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 70% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 32% |
all | 44% |
load | 56% |
store | 12% |
mul | 70% |
add-sub | 36% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 47% |
all | 37% |
load | 51% |
store | 10% |
mul | 70% |
add-sub | 45% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 39% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RCX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RDX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R9,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %R8,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x48(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x40(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x18(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVL $0,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
MOVQ $0,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVQ $0x1,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x88(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x70(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x78(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x60(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV $0x5d8b10,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %ESI,-0x6c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
CALL 410420 <__kmpc_for_static_init_8@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV -0x78(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x60(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R11,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JB 520c4a <hypre_ParCSRRelaxThreads.extracted.57+0x93a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VMOVSD -0x50(%RBP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
SUB %R11,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x1(%RAX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JNE 5204a0 <hypre_ParCSRRelaxThreads.extracted.57+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
TESTB $0x1,-0x80(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (1.6%) |
JE 520c4a <hypre_ParCSRRelaxThreads.extracted.57+0x93a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %RDX,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
MOV (%R12,%R11,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VUCOMISD (%R15,%RCX,8),%XMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
JE 520c4a <hypre_ParCSRRelaxThreads.extracted.57+0x93a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%R12,%R11,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
VMOVSD (%RAX,%R11,8),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
SUB %RCX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 520ae4 <hypre_ParCSRRelaxThreads.extracted.57+0x7d4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 520a48 <hypre_ParCSRRelaxThreads.extracted.57+0x738> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R14,%ZMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
LEA (%R15,%RCX,8),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x1(%RDX),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
LEA (%RAX,%RCX,8),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 520ae4 <hypre_ParCSRRelaxThreads.extracted.57+0x7d4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VPBROADCASTQ %RSI,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
JMP 520a56 <hypre_ParCSRRelaxThreads.extracted.57+0x746> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VBROADCASTSD 0x7db56(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (12.5%) |
VMOVDQU64 0x8ea4c(%RIP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VPBROADCASTQ %R14,%ZMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VPBROADCASTQ %RBX,%ZMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
AND $-0x2,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
MOV %RAX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R11,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JMP 52059d <hypre_ParCSRRelaxThreads.extracted.57+0x28d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %RSI,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VPBROADCASTQ %R14,%ZMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
VPBROADCASTQ %RDX,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
ADD %RDX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPCMPNLEUQ 0x8e48c(%RIP),%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
VMOVDQU64 (%RAX,%RCX,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQA64 %ZMM7,%ZMM4{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM4,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM4,%ZMM6,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VGATHERQPD (,%ZMM4,1),%ZMM6{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVUPD (%R15,%RCX,8),%ZMM4{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPD %ZMM6,%ZMM3{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD 0x7d547(%RIP){1to8},%ZMM3,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 | vect (100.0%) |
VMULPD %ZMM3,%ZMM4,%ZMM3{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM3,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM4,%ZMM3,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM3,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM3,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM3,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV (%R13,%R11,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%R13,%R11,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %RCX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 520c2c <hypre_ParCSRRelaxThreads.extracted.57+0x91c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 520b8c <hypre_ParCSRRelaxThreads.extracted.57+0x87c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x40(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %RBX,%ZMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
LEA -0x1(%RDX),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
LEA (%RAX,%RCX,8),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA (%R9,%RCX,8),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM4,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM6,%ZMM4,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM4,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM6,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM4,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM6,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM4,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 520c2c <hypre_ParCSRRelaxThreads.extracted.57+0x91c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VPBROADCASTQ %RSI,%ZMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
JMP 520b9a <hypre_ParCSRRelaxThreads.extracted.57+0x88a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %RSI,%ZMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VPBROADCASTQ %RBX,%ZMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
VPBROADCASTQ %RDX,%ZMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
ADD %RDX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM6,%ZMM4,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPCMPNLEUQ 0x8e348(%RIP),%ZMM4,%K1 | vect (100.0%) | |||||||||||||||||
VMOVDQU64 (%RAX,%RCX,8),%ZMM4{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQA64 %ZMM4,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM2,%ZMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VGATHERQPD (,%ZMM2,1),%ZMM3{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVUPD (%RAX,%RCX,8),%ZMM2{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPD %ZMM3,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD 0x7d3ff(%RIP){1to8},%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 | vect (100.0%) |
VMULPD %ZMM1,%ZMM2,%ZMM1{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM1,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM2,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM2,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM2,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM1,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV -0x68(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMULSD %XMM0,%XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VDIVSD (%RAX,%R11,8),%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 | scal (12.5%) |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDSD (%RAX,%R11,8),%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VMOVSD %XMM0,(%RAX,%R11,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
MOV -0x6c(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
MOV $0x5d8b30,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
JMP 410170 <__kmpc_for_static_fini@plt> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
Source file and lines | ams.c:3662-3682 |
Module | exec |
nb instructions | 194 |
nb uops | 297 |
loop length | 930 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 9 |
used ymm registers | 7 |
used zmm registers | 9 |
nb stack references | 22 |
ADD-SUB / MUL ratio | 5.67 |
micro-operation queue | 49.50 cycles |
front end | 49.50 cycles |
ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 9.75 | 9.75 | 9.50 | 9.50 | 7.50 | 18.33 | 18.33 | 18.33 | 24.00 | 25.17 | 24.83 | 25.00 | 11.00 | 11.00 |
cycles | 9.75 | 9.75 | 9.50 | 9.50 | 7.50 | 20.67 | 20.67 | 20.67 | 26.00 | 25.17 | 25.33 | 28.50 | 11.00 | 11.00 |
Cycles executing div or sqrt instructions | 5.00 |
Front-end | 49.50 |
Dispatch | 28.50 |
DIV/SQRT | 5.00 |
Overall L1 | 49.50 |
all | 29% |
load | 41% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 66% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 35% |
all | 67% |
load | 50% |
store | 0% |
mul | 66% |
add-sub | 47% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 91% |
all | 47% |
load | 45% |
store | 0% |
mul | 66% |
add-sub | 52% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 60% |
all | 32% |
load | 47% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 70% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 32% |
all | 44% |
load | 56% |
store | 12% |
mul | 70% |
add-sub | 36% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 47% |
all | 37% |
load | 51% |
store | 10% |
mul | 70% |
add-sub | 45% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 39% |
Instruction | Nb FU | ALU0/BRU0 | ALU1 | ALU2 | ALU3 | BRU1 | AGU0 | AGU1 | AGU2 | FP0 | FP1 | FP2 | FP3 | FP4 | FP5 | Latency | Recip. throughput | Vectorization |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
SUB $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RCX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %RDX,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R9,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV %R8,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
MOV 0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x48(%RBP),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV 0x40(%RBP),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
MOV %RAX,-0x58(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x30(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x28(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x40(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV %RAX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV 0x18(%RBP),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x10(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOVL $0,-0x70(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
MOVQ $0,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOVQ $0x1,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV %RAX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
SUB $0x8,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x88(%RBP),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x70(%RBP),%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x78(%RBP),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x60(%RBP),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV $0x5d8b10,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %ESI,-0x6c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (6.3%) |
MOV $0x22,%EDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | N/A |
CALL 410420 <__kmpc_for_static_init_8@plt> | 2 | 0.50 | 0 | 0 | 0 | 0.50 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | N/A |
ADD $0x20,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV -0x78(%RBP),%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x60(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
CMP %R11,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JB 520c4a <hypre_ParCSRRelaxThreads.extracted.57+0x93a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VMOVSD -0x50(%RBP),%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
SUB %R11,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA 0x1(%RAX),%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
MOV %RAX,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JNE 5204a0 <hypre_ParCSRRelaxThreads.extracted.57+0x190> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
TESTB $0x1,-0x80(%RBP) | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | scal (1.6%) |
JE 520c4a <hypre_ParCSRRelaxThreads.extracted.57+0x93a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
ADD %RDX,%R11 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPXOR %XMM5,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
MOV (%R12,%R11,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VUCOMISD (%R15,%RCX,8),%XMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0.50 | 6 | 1 | scal (12.5%) |
JE 520c4a <hypre_ParCSRRelaxThreads.extracted.57+0x93a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%R12,%R11,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
VMOVSD (%RAX,%R11,8),%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 | scal (12.5%) |
SUB %RCX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 520ae4 <hypre_ParCSRRelaxThreads.extracted.57+0x7d4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 520a48 <hypre_ParCSRRelaxThreads.extracted.57+0x738> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %R14,%ZMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
LEA (%R15,%RCX,8),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA -0x1(%RDX),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
LEA (%RAX,%RCX,8),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
NOPL (%RAX) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM7,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM7,%XMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM7,%XMM8,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM7,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 520ae4 <hypre_ParCSRRelaxThreads.extracted.57+0x7d4> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VPBROADCASTQ %RSI,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
JMP 520a56 <hypre_ParCSRRelaxThreads.extracted.57+0x746> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VBROADCASTSD 0x7db56(%RIP),%ZMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 5 | 1 | scal (12.5%) |
VMOVDQU64 0x8ea4c(%RIP),%ZMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VPBROADCASTQ %R14,%ZMM5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VPBROADCASTQ %RBX,%ZMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
AND $-0x2,%RAX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM7,%XMM7,%XMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
MOV %RAX,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
MOV %R11,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 | scal (12.5%) |
JMP 52059d <hypre_ParCSRRelaxThreads.extracted.57+0x28d> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %RSI,%ZMM7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VPBROADCASTQ %R14,%ZMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
VPBROADCASTQ %RDX,%ZMM8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV -0x38(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
ADD %RDX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM8,%ZMM7,%ZMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPCMPNLEUQ 0x8e48c(%RIP),%ZMM7,%K1 | vect (100.0%) | |||||||||||||||||
VMOVDQU64 (%RAX,%RCX,8),%ZMM7{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQA64 %ZMM7,%ZMM4{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM4,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM4,%ZMM6,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM6,%XMM6,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VGATHERQPD (,%ZMM4,1),%ZMM6{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVUPD (%R15,%RCX,8),%ZMM4{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPD %ZMM6,%ZMM3{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD 0x7d547(%RIP){1to8},%ZMM3,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 | vect (100.0%) |
VMULPD %ZMM3,%ZMM4,%ZMM3{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM3,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM4,%ZMM3,%ZMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM3,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM3,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM4,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM3,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV (%R13,%R11,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV 0x8(%R13,%R11,8),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (12.5%) |
SUB %RCX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JLE 520c2c <hypre_ParCSRRelaxThreads.extracted.57+0x91c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV %RSI,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | N/A |
AND $-0x8,%RDX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
JE 520b8c <hypre_ParCSRRelaxThreads.extracted.57+0x87c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
MOV -0x40(%RBP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VPBROADCASTQ %RBX,%ZMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
LEA -0x1(%RDX),%RDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VXORPD %XMM4,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (25.0%) |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | scal (6.3%) |
LEA (%RAX,%RCX,8),%R8 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
LEA (%R9,%RCX,8),%R9 | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
NOPW %CS:(%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
VEXTRACTF64X4 $0x1,%ZMM4,%YMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM6,%ZMM4,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM4,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM6,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM4,%XMM6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM6,%XMM4,%XMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM4,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
CMP %RDX,%RSI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | scal (12.5%) |
JE 520c2c <hypre_ParCSRRelaxThreads.extracted.57+0x91c> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50-1 | N/A |
VPBROADCASTQ %RSI,%ZMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
JMP 520b9a <hypre_ParCSRRelaxThreads.extracted.57+0x88a> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
VPBROADCASTQ %RSI,%ZMM4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
VPBROADCASTQ %RBX,%ZMM3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | N/A |
VPBROADCASTQ %RDX,%ZMM6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | scal (12.5%) |
MOV -0x40(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
ADD %RDX,%RCX | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
VPSUBQ %ZMM6,%ZMM4,%ZMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPCMPNLEUQ 0x8e348(%RIP),%ZMM4,%K1 | vect (100.0%) | |||||||||||||||||
VMOVDQU64 (%RAX,%RCX,8),%ZMM4{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
MOV -0x48(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
KMOVQ %K1,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | N/A |
VMOVDQA64 %ZMM4,%ZMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VPSLLQ $0x3,%ZMM2,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 1 | 1 | vect (100.0%) |
VPADDQ %ZMM2,%ZMM3,%ZMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (100.0%) |
VPXOR %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | vect (25.0%) |
VGATHERQPD (,%ZMM2,1),%ZMM3{%K2} | 48 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.75 | 4.58 | 5.08 | 3.58 | 5 | 5 | 0-16 | 8.94 | vect (100.0%) |
VMOVUPD (%RAX,%RCX,8),%ZMM2{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VMOVAPD %ZMM3,%ZMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 | vect (100.0%) |
VXORPD 0x7d3ff(%RIP){1to8},%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 1 | vect (100.0%) |
VMULPD %ZMM1,%ZMM2,%ZMM1{%K1}{z} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF64X4 $0x1,%ZMM1,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.50 | vect (50.0%) |
VADDPD %ZMM2,%ZMM1,%ZMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 1 | vect (100.0%) |
VEXTRACTF128 $0x1,%YMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 1 | 0.25 | vect (25.0%) |
VADDPD %XMM2,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | vect (25.0%) |
VPERMILPD $0x1,%XMM1,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | vect (25.0%) |
VADDSD %XMM2,%XMM1,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VADDSD %XMM1,%XMM5,%XMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
MOV -0x68(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VMULSD %XMM0,%XMM5,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VDIVSD (%RAX,%R11,8),%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 13 | 5 | scal (12.5%) |
MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | N/A |
VADDSD (%RAX,%R11,8),%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 3 | 0.50 | scal (12.5%) |
VMOVSD %XMM0,(%RAX,%R11,8) | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 1 | 1 | scal (12.5%) |
MOV -0x6c(%RBP),%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.33 | scal (6.3%) |
MOV $0x5d8b30,%EDI | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
ADD $0x68,%RSP | 1 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 | N/A |
POP %RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
POP %RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 | N/A |
VZEROUPPER | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | vect (25.0%) |
JMP 410170 <__kmpc_for_static_fini@plt> | 1 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | N/A |
NOPL (%RAX,%RAX,1) | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.09 | N/A |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_ParCSRRelaxThreads.extracted.57– | 47.05 | 34.80 |
▼Loop 4557 - ams.c:3662-3682 - exec– | 24.52 | 17.09 |
○Loop 4559 - ams.c:3672-3675 - exec | 12.08 | 8.42 |
○Loop 4561 - ams.c:3672-3675 - exec | 10.40 | 7.25 |
○Loop 4558 - ams.c:3677-3680 - exec | 0.03 | 0.02 |
○Loop 4560 - ams.c:3677-3680 - exec | 0.03 | 0.02 |
○Loop 4556 - ams.c:3672-3675 - exec | 0.00 | 0.00 |
○Loop 4555 - ams.c:3677-3680 - exec | 0.00 | 0.00 |