Function: __svml_i64div8_z0 | Module: exec | Source: :0-0 | Coverage: 0.09% |
---|
Function: __svml_i64div8_z0 | Module: exec | Source: :0-0 | Coverage: 0.09% |
---|
*** This Panel is Intentionally Left Blank. *** It is due to a lack of debug symbols in the given object |
0x466310 ENDBR64 |
0x466314 VMOVDQU64 0x251a2(%RIP),%ZMM3 |
0x46631e VPCMPGTQ %ZMM1,%ZMM3,%K1 |
0x466324 VPSUBQ %ZMM1,%ZMM3,%ZMM1{%K1} |
0x46632a VMOVDQU64 0x252cc(%RIP),%ZMM5 |
0x466334 VMOVUPD 0x25202(%RIP),%ZMM2 |
0x46633e VMOVUPD 0x25338(%RIP),%ZMM6 |
0x466348 VMOVUPD 0x2522e(%RIP),%ZMM4 |
0x466352 VPCMPEQQ %ZMM3,%ZMM1,%K0 |
0x466358 KORTESTB %K0,%K0 |
0x46635c JE 466365 |
0x46635e MOV $0,%EAX |
0x466363 DIV %AL |
0x466365 VPANDQ %ZMM5,%ZMM1,%ZMM7 |
0x46636b VCVTUQQ2PD {rn-sae},%ZMM7,%ZMM7 |
0x466371 VPANDNQ %ZMM1,%ZMM5,%ZMM8 |
0x466377 VCVTUQQ2PD {rn-sae},%ZMM8,%ZMM8 |
0x46637d VCVTUQQ2PD {rn-sae},%ZMM1,%ZMM9 |
0x466383 VANDPD %ZMM9,%ZMM6,%ZMM6 |
0x466389 VSUBPD {rn-sae},%ZMM6,%ZMM8,%ZMM8 |
0x46638f VADDPD {rn-sae},%ZMM8,%ZMM7,%ZMM7 |
0x466395 VPCMPGTQ %ZMM0,%ZMM3,%K2 |
0x46639b VPSUBQ %ZMM0,%ZMM3,%ZMM0{%K2} |
0x4663a1 VCVTUQQ2PD {rn-sae},%ZMM0,%ZMM8 |
0x4663a7 VRCP14PD %ZMM9,%ZMM10 |
0x4663ad VFNMADD231PD {rn-sae},%ZMM9,%ZMM10,%ZMM2 |
0x4663b3 VFMADD132PD {rn-sae},%ZMM10,%ZMM10,%ZMM2 |
0x4663b9 VMULPD {rn-sae},%ZMM2,%ZMM8,%ZMM8 |
0x4663bf VRNDSCALEPD $0x3,{sae},%ZMM8,%ZMM8 |
0x4663c6 VANDPD %ZMM8,%ZMM4,%ZMM8 |
0x4663cc VPANDNQ %ZMM0,%ZMM5,%ZMM9 |
0x4663d2 VCVTUQQ2PD {rn-sae},%ZMM9,%ZMM9 |
0x4663d8 VFNMADD231PD {rn-sae},%ZMM8,%ZMM6,%ZMM9 |
0x4663de VPANDQ %ZMM5,%ZMM0,%ZMM5 |
0x4663e4 VCVTUQQ2PD {rn-sae},%ZMM5,%ZMM5 |
0x4663ea VFNMADD231PD {rn-sae},%ZMM8,%ZMM7,%ZMM5 |
0x4663f0 VADDPD {rn-sae},%ZMM5,%ZMM9,%ZMM5 |
0x4663f6 VMULPD {rn-sae},%ZMM2,%ZMM5,%ZMM9 |
0x4663fc VRNDSCALEPD $0x3,{sae},%ZMM9,%ZMM9 |
0x466403 VANDPD %ZMM9,%ZMM4,%ZMM9 |
0x466409 VFNMADD231PD {rn-sae},%ZMM9,%ZMM6,%ZMM5 |
0x46640f VFNMADD231PD {rn-sae},%ZMM9,%ZMM7,%ZMM5 |
0x466415 VMULPD {rn-sae},%ZMM2,%ZMM5,%ZMM10 |
0x46641b VRNDSCALEPD $0x3,{sae},%ZMM10,%ZMM10 |
0x466422 VANDPD %ZMM10,%ZMM4,%ZMM4 |
0x466428 VFNMADD231PD {rn-sae},%ZMM6,%ZMM4,%ZMM5 |
0x46642e VFNMADD231PD {rn-sae},%ZMM7,%ZMM4,%ZMM5 |
0x466434 VMULPD {rn-sae},%ZMM2,%ZMM5,%ZMM2 |
0x46643a VCVTPD2UQQ {rz-sae},%ZMM4,%ZMM4 |
0x466440 VCVTPD2UQQ {rz-sae},%ZMM9,%ZMM5 |
0x466446 VCVTPD2UQQ {rz-sae},%ZMM8,%ZMM6 |
0x46644c VPADDQ %ZMM5,%ZMM6,%ZMM5 |
0x466452 VCVTPD2UQQ {rz-sae},%ZMM2,%ZMM2 |
0x466458 VPADDD %ZMM4,%ZMM2,%ZMM2 |
0x46645e VPADDQ %ZMM2,%ZMM5,%ZMM2 |
0x466464 VPMULLQ %ZMM1,%ZMM2,%ZMM4 |
0x46646a VPSUBQ %ZMM4,%ZMM0,%ZMM0 |
0x466470 VPCMPNLTUQ %ZMM1,%ZMM0,%K3 |
0x466477 VPADDQ 0x2513f(%RIP),%ZMM2,%ZMM2{%K3} |
0x466481 KXORB %K1,%K2,%K1 |
0x466485 VPSUBQ %ZMM2,%ZMM3,%ZMM2{%K1} |
0x46648b VMOVDQA64 %ZMM2,%ZMM0 |
0x466491 RET |
0x466492 NOPW %CS:(%RAX,%RAX,1) |
0x46649c NOPL (%RAX) |
Path / |
Source file and lines | |
Module | exec |
nb instructions | 62 |
nb uops | 70.50 |
loop length | 382.50 |
used x86 registers | 0.50 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 11 |
nb stack references | 0 |
ADD-SUB / MUL ratio | 0.75 |
micro-operation queue | 11.75 cycles |
front end | 11.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 29.00 | 1.50 | 2.33 | 2.33 | 0.00 | 29.00 | 2.00 | 0.00 | 0.00 | 0.00 | 0.50 | 2.33 |
cycles | 29.00 | 7.50 | 2.33 | 2.33 | 0.00 | 29.00 | 2.00 | 0.00 | 0.00 | 0.00 | 0.50 | 2.33 |
Cycles executing div or sqrt instructions | 3.00 |
FE+BE cycles | 105.50-105.45 |
Stall cycles | 93.54-93.49 |
RS full (events) | 104.97-104.92 |
Front-end | 11.75 |
Dispatch | 29.00 |
DIV/SQRT | 3.00 |
Overall L1 | 29.00 |
all | 98% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 96% |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 99% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 98% |
all | 98% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 97% |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 99% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 98% |
Source file and lines | |
Module | exec |
nb instructions | 63 |
nb uops | 73 |
loop length | 386 |
used x86 registers | 1 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 11 |
nb stack references | 0 |
ADD-SUB / MUL ratio | 0.75 |
micro-operation queue | 12.17 cycles |
front end | 12.17 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 29.00 | 3.00 | 2.33 | 2.33 | 0.00 | 29.00 | 2.00 | 0.00 | 0.00 | 0.00 | 1.00 | 2.33 |
cycles | 29.00 | 7.50 | 2.33 | 2.33 | 0.00 | 29.00 | 2.00 | 0.00 | 0.00 | 0.00 | 1.00 | 2.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 105.40-105.34 |
Stall cycles | 93.09-93.03 |
RS full (events) | 104.86-104.80 |
Front-end | 12.17 |
Dispatch | 29.00 |
DIV/SQRT | 6.00 |
Overall L1 | 29.00 |
all | 96% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 93% |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 98% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
all | 96% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 94% |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 98% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 96% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ENDBR64 | |||||||||||||||
VMOVDQU64 0x251a2(%RIP),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPCMPGTQ %ZMM1,%ZMM3,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0-3 | 1 |
VPSUBQ %ZMM1,%ZMM3,%ZMM1{%K1} | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VMOVDQU64 0x252cc(%RIP),%ZMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VMOVUPD 0x25202(%RIP),%ZMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VMOVUPD 0x25338(%RIP),%ZMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VMOVUPD 0x2522e(%RIP),%ZMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPCMPEQQ %ZMM3,%ZMM1,%K0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KORTESTB %K0,%K0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 466365 <__svml_i64div8_z0+0x55> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
DIV %AL | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
VPANDQ %ZMM5,%ZMM1,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VCVTUQQ2PD {rn-sae},%ZMM7,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPANDNQ %ZMM1,%ZMM5,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VCVTUQQ2PD {rn-sae},%ZMM8,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTUQQ2PD {rn-sae},%ZMM1,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %ZMM9,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VSUBPD {rn-sae},%ZMM6,%ZMM8,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD {rn-sae},%ZMM8,%ZMM7,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPCMPGTQ %ZMM0,%ZMM3,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0-3 | 1 |
VPSUBQ %ZMM0,%ZMM3,%ZMM0{%K2} | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VCVTUQQ2PD {rn-sae},%ZMM0,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRCP14PD %ZMM9,%ZMM10 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 2 |
VFNMADD231PD {rn-sae},%ZMM9,%ZMM10,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD {rn-sae},%ZMM10,%ZMM10,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD {rn-sae},%ZMM2,%ZMM8,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALEPD $0x3,{sae},%ZMM8,%ZMM8 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VANDPD %ZMM8,%ZMM4,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPANDNQ %ZMM0,%ZMM5,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VCVTUQQ2PD {rn-sae},%ZMM9,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231PD {rn-sae},%ZMM8,%ZMM6,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPANDQ %ZMM5,%ZMM0,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VCVTUQQ2PD {rn-sae},%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231PD {rn-sae},%ZMM8,%ZMM7,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD {rn-sae},%ZMM5,%ZMM9,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD {rn-sae},%ZMM2,%ZMM5,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALEPD $0x3,{sae},%ZMM9,%ZMM9 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VANDPD %ZMM9,%ZMM4,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VFNMADD231PD {rn-sae},%ZMM9,%ZMM6,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231PD {rn-sae},%ZMM9,%ZMM7,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD {rn-sae},%ZMM2,%ZMM5,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALEPD $0x3,{sae},%ZMM10,%ZMM10 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VANDPD %ZMM10,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VFNMADD231PD {rn-sae},%ZMM6,%ZMM4,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231PD {rn-sae},%ZMM7,%ZMM4,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD {rn-sae},%ZMM2,%ZMM5,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTPD2UQQ {rz-sae},%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTPD2UQQ {rz-sae},%ZMM9,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTPD2UQQ {rz-sae},%ZMM8,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDQ %ZMM5,%ZMM6,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VCVTPD2UQQ {rz-sae},%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDD %ZMM4,%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM2,%ZMM5,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMULLQ %ZMM1,%ZMM2,%ZMM4 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPSUBQ %ZMM4,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPCMPNLTUQ %ZMM1,%ZMM0,%K3 | |||||||||||||||
VPADDQ 0x2513f(%RIP),%ZMM2,%ZMM2{%K3} | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
KXORB %K1,%K2,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPSUBQ %ZMM2,%ZMM3,%ZMM2{%K1} | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VMOVDQA64 %ZMM2,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
Source file and lines | |
Module | exec |
nb instructions | 61 |
nb uops | 68 |
loop length | 379 |
used x86 registers | 0 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 11 |
nb stack references | 0 |
ADD-SUB / MUL ratio | 0.75 |
micro-operation queue | 11.33 cycles |
front end | 11.33 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 29.00 | 0.00 | 2.33 | 2.33 | 0.00 | 29.00 | 2.00 | 0.00 | 0.00 | 0.00 | 0.00 | 2.33 |
cycles | 29.00 | 7.50 | 2.33 | 2.33 | 0.00 | 29.00 | 2.00 | 0.00 | 0.00 | 0.00 | 0.00 | 2.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 105.61-105.57 |
Stall cycles | 94.00-93.96 |
RS full (events) | 105.09-105.05 |
Front-end | 11.33 |
Dispatch | 29.00 |
Overall L1 | 29.00 |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 100% |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 100% |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
all | 100% |
load | 100% |
store | NA (no store vectorizable/vectorized instructions) |
mul | 100% |
add-sub | 100% |
fma | 100% |
div/sqrt | 100% |
other | 100% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ENDBR64 | |||||||||||||||
VMOVDQU64 0x251a2(%RIP),%ZMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPCMPGTQ %ZMM1,%ZMM3,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0-3 | 1 |
VPSUBQ %ZMM1,%ZMM3,%ZMM1{%K1} | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VMOVDQU64 0x252cc(%RIP),%ZMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VMOVUPD 0x25202(%RIP),%ZMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VMOVUPD 0x25338(%RIP),%ZMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VMOVUPD 0x2522e(%RIP),%ZMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.50 |
VPCMPEQQ %ZMM3,%ZMM1,%K0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
KORTESTB %K0,%K0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JE 466365 <__svml_i64div8_z0+0x55> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VPANDQ %ZMM5,%ZMM1,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VCVTUQQ2PD {rn-sae},%ZMM7,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPANDNQ %ZMM1,%ZMM5,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VCVTUQQ2PD {rn-sae},%ZMM8,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTUQQ2PD {rn-sae},%ZMM1,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VANDPD %ZMM9,%ZMM6,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VSUBPD {rn-sae},%ZMM6,%ZMM8,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VADDPD {rn-sae},%ZMM8,%ZMM7,%ZMM7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VPCMPGTQ %ZMM0,%ZMM3,%K2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0-3 | 1 |
VPSUBQ %ZMM0,%ZMM3,%ZMM0{%K2} | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VCVTUQQ2PD {rn-sae},%ZMM0,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRCP14PD %ZMM9,%ZMM10 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 2 |
VFNMADD231PD {rn-sae},%ZMM9,%ZMM10,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFMADD132PD {rn-sae},%ZMM10,%ZMM10,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD {rn-sae},%ZMM2,%ZMM8,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALEPD $0x3,{sae},%ZMM8,%ZMM8 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VANDPD %ZMM8,%ZMM4,%ZMM8 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPANDNQ %ZMM0,%ZMM5,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VCVTUQQ2PD {rn-sae},%ZMM9,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231PD {rn-sae},%ZMM8,%ZMM6,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPANDQ %ZMM5,%ZMM0,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VCVTUQQ2PD {rn-sae},%ZMM5,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231PD {rn-sae},%ZMM8,%ZMM7,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VADDPD {rn-sae},%ZMM5,%ZMM9,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VMULPD {rn-sae},%ZMM2,%ZMM5,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALEPD $0x3,{sae},%ZMM9,%ZMM9 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VANDPD %ZMM9,%ZMM4,%ZMM9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VFNMADD231PD {rn-sae},%ZMM9,%ZMM6,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231PD {rn-sae},%ZMM9,%ZMM7,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD {rn-sae},%ZMM2,%ZMM5,%ZMM10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VRNDSCALEPD $0x3,{sae},%ZMM10,%ZMM10 | 2 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 1 |
VANDPD %ZMM10,%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VFNMADD231PD {rn-sae},%ZMM6,%ZMM4,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VFNMADD231PD {rn-sae},%ZMM7,%ZMM4,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VMULPD {rn-sae},%ZMM2,%ZMM5,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTPD2UQQ {rz-sae},%ZMM4,%ZMM4 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTPD2UQQ {rz-sae},%ZMM9,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VCVTPD2UQQ {rz-sae},%ZMM8,%ZMM6 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDQ %ZMM5,%ZMM6,%ZMM5 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VCVTPD2UQQ {rz-sae},%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
VPADDD %ZMM4,%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %ZMM2,%ZMM5,%ZMM2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPMULLQ %ZMM1,%ZMM2,%ZMM4 | 5 | 1.50 | 0 | 0 | 0 | 0 | 1.50 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | 1.50 |
VPSUBQ %ZMM4,%ZMM0,%ZMM0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VPCMPNLTUQ %ZMM1,%ZMM0,%K3 | |||||||||||||||
VPADDQ 0x2513f(%RIP),%ZMM2,%ZMM2{%K3} | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.67 |
KXORB %K1,%K2,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
VPSUBQ %ZMM2,%ZMM3,%ZMM2{%K1} | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.50 |
VMOVDQA64 %ZMM2,%ZMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
Name | Coverage (%) | Time (s) |
---|---|---|
○__svml_i64div8_z0 | 0.09 | 0.07 |