Function: viscosity_kernel(int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1D<double>&, ... | Module: exec | Source: viscosity.cpp:36-66 [...] | Coverage: 2.77% |
---|
Function: viscosity_kernel(int, int, int, int, clover::Buffer1D<double>&, clover::Buffer1D<double>&, ... | Module: exec | Source: viscosity.cpp:36-66 [...] | Coverage: 2.77% |
---|
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/viscosity.cpp: 36 - 66 |
-------------------------------------------------------------------------------- |
36: #pragma omp parallel for simd collapse(2) |
37: for (int j = (y_min + 1); j < (y_max + 2); j++) { |
38: for (int i = (x_min + 1); i < (x_max + 2); i++) { |
39: double ugrad = (xvel0(i + 1, j + 0) + xvel0(i + 1, j + 1)) - (xvel0(i, j) + xvel0(i + 0, j + 1)); |
40: double vgrad = (yvel0(i + 0, j + 1) + yvel0(i + 1, j + 1)) - (yvel0(i, j) + yvel0(i + 1, j + 0)); |
41: double div = (celldx[i] * (ugrad) + celldy[j] * (vgrad)); |
42: double strain2 = 0.5 * (xvel0(i + 0, j + 1) + xvel0(i + 1, j + 1) - xvel0(i, j) - xvel0(i + 1, j + 0)) / celldy[j] + |
43: 0.5 * (yvel0(i + 1, j + 0) + yvel0(i + 1, j + 1) - yvel0(i, j) - yvel0(i + 0, j + 1)) / celldx[i]; |
44: double pgradx = (pressure(i + 1, j + 0) - pressure(i - 1, j + 0)) / (celldx[i] + celldx[i + 1]); |
45: double pgrady = (pressure(i + 0, j + 1) - pressure(i + 0, j - 1)) / (celldy[j] + celldy[j + 2]); |
46: double pgradx2 = pgradx * pgradx; |
47: double pgrady2 = pgrady * pgrady; |
48: double limiter = ((0.5 * (ugrad) / celldx[i]) * pgradx2 + (0.5 * (vgrad) / celldy[j]) * pgrady2 + strain2 * pgradx * pgrady) / |
49: std::fmax(pgradx2 + pgrady2, g_small); |
50: if ((limiter > 0.0) || (div >= 0.0)) { |
51: viscosity(i, j) = 0.0; |
52: } else { |
53: double dirx = 1.0; |
54: if (pgradx < 0.0) dirx = -1.0; |
55: pgradx = dirx * std::fmax(g_small, std::fabs(pgradx)); |
56: double diry = 1.0; |
57: if (pgradx < 0.0) diry = -1.0; |
58: pgrady = diry * std::fmax(g_small, std::fabs(pgrady)); |
59: double pgrad = std::sqrt(pgradx * pgradx + pgrady * pgrady); |
60: double xgrad = std::fabs(celldx[i] * pgrad / pgradx); |
61: double ygrad = std::fabs(celldy[j] * pgrad / pgrady); |
62: double grad = std::fmin(xgrad, ygrad); |
63: double grad2 = grad * grad; |
64: viscosity(i, j) = 2.0 * density0(i, j) * grad2 * limiter * limiter; |
65: } |
66: } |
/scratch_na/users/xoserete/qaas_runs/171-320-5323/intel/CloverLeafCXX/build/CloverLeafCXX/src/omp/context.h: 46 - 69 |
-------------------------------------------------------------------------------- |
46: T &operator[](size_t i) const { return data[i]; } |
[...] |
69: T &operator()(size_t i, size_t j) const { return data[i + j * sizeX]; } |
0x4537a0 PUSH %RBP |
0x4537a1 MOV %RSP,%RBP |
0x4537a4 PUSH %R15 |
0x4537a6 PUSH %R14 |
0x4537a8 PUSH %R13 |
0x4537aa PUSH %R12 |
0x4537ac PUSH %RBX |
0x4537ad AND $-0x20,%RSP |
0x4537b1 SUB $0x160,%RSP |
0x4537b8 MOV %R8,0x28(%RSP) |
0x4537bd MOV %RDX,%R8 |
0x4537c0 MOV 0x48(%RBP),%RAX |
0x4537c4 MOV 0x38(%RBP),%R13 |
0x4537c8 MOV 0x30(%RBP),%RSI |
0x4537cc MOV 0x20(%RBP),%RBX |
0x4537d0 MOV 0x18(%RBP),%R14 |
0x4537d4 MOV 0x10(%RBP),%R12 |
0x4537d8 MOV 0x28(%RBP),%EDX |
0x4537db MOV %RDX,0x38(%RSP) |
0x4537e0 MOVL $0,0x34(%RSP) |
0x4537e8 TEST %RAX,%RAX |
0x4537eb JS 453df2 |
0x4537f1 MOV %R9,%R15 |
0x4537f4 MOV %R8,0x18(%RSP) |
0x4537f9 MOV %RCX,0x10(%RSP) |
0x4537fe MOV %RSI,0x20(%RSP) |
0x453803 MOV (%RDI),%ESI |
0x453805 MOVQ $0,0x68(%RSP) |
0x45380e MOV %RAX,0x60(%RSP) |
0x453813 MOVQ $0x1,0xb8(%RSP) |
0x45381f SUB $0x8,%RSP |
0x453823 LEA 0xc0(%RSP),%RAX |
0x45382b LEA 0x3c(%RSP),%RCX |
0x453830 LEA 0x70(%RSP),%R8 |
0x453835 LEA 0x68(%RSP),%R9 |
0x45383a MOV $0x6865b0,%EDI |
0x45383f MOV %ESI,0x38(%RSP) |
0x453843 MOV $0x22,%EDX |
0x453848 PUSH $0x1 |
0x45384a PUSH $0x1 |
0x45384c PUSH %RAX |
0x45384d CALL 403020 <__kmpc_for_static_init_8@plt> |
0x453852 ADD $0x20,%RSP |
0x453856 MOV 0x68(%RSP),%RSI |
0x45385b MOV 0x60(%RSP),%RCX |
0x453860 CMP %RCX,%RSI |
0x453863 JA 453e0b |
0x453869 MOV %RCX,%RAX |
0x45386c MOV %R15,%RDI |
0x45386f MOV (%R14),%R15 |
0x453872 MOV 0x10(%R14),%RCX |
0x453876 MOV %RCX,0x58(%RSP) |
0x45387b MOV (%RBX),%RDX |
0x45387e MOV 0x10(%RBX),%RCX |
0x453882 MOV %RCX,0x50(%RSP) |
0x453887 MOV 0x18(%RSP),%RCX |
0x45388c MOV 0x8(%RCX),%RCX |
0x453890 MOV %RCX,0x18(%RSP) |
0x453895 MOV 0x10(%RSP),%RCX |
0x45389a MOV 0x8(%RCX),%RCX |
0x45389e MOV %RCX,0x10(%RSP) |
0x4538a3 MOV (%RDI),%RBX |
0x4538a6 MOV 0x10(%RDI),%RCX |
0x4538aa MOV %RCX,0x48(%RSP) |
0x4538af MOV (%R12),%R14 |
0x4538b3 MOV 0x10(%R12),%RDI |
0x4538b8 LEA 0x1(%RSI),%RCX |
0x4538bc INC %RAX |
0x4538bf CMP %RAX,%RCX |
0x4538c2 MOV %RCX,0xa0(%RSP) |
0x4538ca CMOVG %RCX,%RAX |
0x4538ce MOV %RAX,0xb0(%RSP) |
0x4538d6 MOV %RAX,%RCX |
0x4538d9 SUB %RSI,%RCX |
0x4538dc MOV 0x20(%RSP),%RAX |
0x4538e1 SUB %EAX,%R13D |
0x4538e4 MOV $-0x4,%R12D |
0x4538ea AND %RCX,%R12 |
0x4538ed MOV %RDX,0xa8(%RSP) |
0x4538f5 MOV %RDI,0x40(%RSP) |
0x4538fa JE 453e2a |
0x453900 MOV %RCX,0x90(%RSP) |
0x453908 VPBROADCASTQ %RSI,%YMM0 |
0x45390e VPBROADCASTQ %R13,%YMM8 |
0x453914 MOV 0x38(%RSP),%RCX |
0x453919 VPBROADCASTD %ECX,%XMM1 |
0x45391f VMOVDQU %XMM1,0x80(%RSP) |
0x453928 VPBROADCASTQ %R15,%YMM12 |
0x45392e VPBROADCASTD %EAX,%XMM1 |
0x453934 VMOVDQU %XMM1,0x70(%RSP) |
0x45393a VPBROADCASTQ %RDX,%YMM14 |
0x453940 VPBROADCASTQ %RBX,%YMM15 |
0x453946 VPBROADCASTQ %R14,%YMM1 |
0x45394c VMOVDQU %YMM1,0xc0(%RSP) |
0x453955 VPBROADCASTQ %RDI,%YMM1 |
0x45395b VMOVDQU %YMM1,0x120(%RSP) |
0x453964 VMOVDQU64 0x12792(%RIP),%YMM18 |
0x45396e VMOVDQU %YMM0,0xe0(%RSP) |
0x453977 VPADDQ %YMM18,%YMM0,%YMM9 |
0x45397d INC %RAX |
0x453980 VPBROADCASTQ %RAX,%YMM0 |
0x453986 VMOVDQU %YMM0,0x100(%RSP) |
0x45398f XOR %EDI,%EDI |
0x453991 VXORPD %XMM20,%XMM20,%XMM20 |
0x453997 VPCMPEQD %XMM11,%XMM11,%XMM11 |
0x45399c VBROADCASTSD 0x12742(%RIP),%YMM22 |
0x4539a6 VBROADCASTSD 0x11d20(%RIP),%YMM23 |
0x4539b0 VBROADCASTSD 0x116ae(%RIP),%YMM25 |
0x4539ba VBROADCASTSD 0x11d24(%RIP),%YMM26 |
0x4539c4 VBROADCASTSD 0x128e2(%RIP),%YMM27 |
0x4539ce VPBROADCASTQ 0x12720(%RIP),%YMM28 |
0x4539d8 JMP 453ab7 |
0x4539dd NOPL (%RAX) |
(664) 0x4539e0 VPSLLQ $0x3,%YMM30,%YMM10 |
(664) 0x4539e7 VPADDQ 0x120(%RSP),%YMM10,%YMM10 |
(664) 0x4539f0 VANDPD %YMM26,%YMM5,%YMM13 |
(664) 0x4539f6 VMAXPD %YMM25,%YMM13,%YMM13 |
(664) 0x4539fc VCMPPD $0x1,%YMM20,%YMM5,%K2 |
(664) 0x453a03 VXORPD %YMM27,%YMM13,%YMM13{%K2} |
(664) 0x453a09 VANDPD %YMM26,%YMM6,%YMM5 |
(664) 0x453a0f VMAXPD %YMM25,%YMM5,%YMM5 |
(664) 0x453a15 VCMPPD $0x1,%YMM20,%YMM13,%K2 |
(664) 0x453a1c VXORPD %YMM27,%YMM5,%YMM5{%K2} |
(664) 0x453a22 VMULPD %YMM13,%YMM13,%YMM6 |
(664) 0x453a27 VFMADD231PD %YMM5,%YMM5,%YMM6 |
(664) 0x453a2c VSQRTPD %YMM6,%YMM6 |
(664) 0x453a30 VMULPD %YMM2,%YMM6,%YMM2 |
(664) 0x453a34 VDIVPD %YMM13,%YMM2,%YMM2 |
(664) 0x453a39 VANDPD %YMM26,%YMM2,%YMM2 |
(664) 0x453a3f VMULPD %YMM4,%YMM6,%YMM4 |
(664) 0x453a43 VPBROADCASTQ %XMM29,%YMM6 |
(664) 0x453a49 VPMULLQ %YMM0,%YMM6,%YMM0 |
(664) 0x453a4f VPBROADCASTQ %XMM7,%YMM6 |
(664) 0x453a54 VPADDQ %YMM1,%YMM0,%YMM0 |
(664) 0x453a58 VPSLLQ $0x3,%YMM0,%YMM0 |
(664) 0x453a5d VPADDQ %YMM0,%YMM6,%YMM0 |
(664) 0x453a61 VPXOR %XMM1,%XMM1,%XMM1 |
(664) 0x453a65 KMOVQ %K1,%K2 |
(664) 0x453a6a VGATHERQPD (,%YMM0,1),%YMM1{%K2} |
(664) 0x453a75 VDIVPD %YMM5,%YMM4,%YMM0 |
(664) 0x453a79 VANDPD %YMM26,%YMM0,%YMM0 |
(664) 0x453a7f VMINPD %YMM0,%YMM2,%YMM0 |
(664) 0x453a83 VMULPD %YMM3,%YMM0,%YMM0 |
(664) 0x453a87 VMULPD %YMM0,%YMM0,%YMM0 |
(664) 0x453a8b VADDPD %YMM1,%YMM1,%YMM1 |
(664) 0x453a8f VMULPD %YMM1,%YMM0,%YMM0 |
(664) 0x453a93 VSCATTERQPD %YMM0,(,%YMM10,1){%K1} |
(664) 0x453a9e VPADDQ %YMM28,%YMM9,%YMM9 |
(664) 0x453aa4 VPADDQ %YMM28,%YMM18,%YMM18 |
(664) 0x453aaa ADD $0x4,%RDI |
(664) 0x453aae CMP %R12,%RDI |
(664) 0x453ab1 JAE 453e01 |
(664) 0x453ab7 VPADDQ 0xe0(%RSP),%YMM18,%YMM0 |
(664) 0x453abf VMOVDQA %YMM8,%YMM1 |
(664) 0x453ac3 MOV $0x454460,%RAX |
(664) 0x453aca CALL %RAX |
(664) 0x453acc VPMOVQD %YMM0,%XMM1 |
(664) 0x453ad2 VPADDD 0x70(%RSP),%XMM1,%XMM29 |
(664) 0x453ada VPADDQ 0x100(%RSP),%YMM0,%YMM0 |
(664) 0x453ae3 VPSLLQ $0x20,%YMM0,%YMM0 |
(664) 0x453ae8 VPSRAQ $0x20,%YMM0,%YMM30 |
(664) 0x453aef VMOVDQA %YMM9,%YMM0 |
(664) 0x453af3 VMOVDQA %YMM8,%YMM1 |
(664) 0x453af7 CALL 454690 <__svml_u64div4_l9> |
(664) 0x453afd VPMOVQD %YMM0,%XMM0 |
(664) 0x453b03 VPADDD 0x80(%RSP),%XMM0,%XMM3 |
(664) 0x453b0c VPMOVSXDQ %XMM3,%YMM0 |
(664) 0x453b11 VXORPS %XMM2,%XMM2,%XMM2 |
(664) 0x453b15 VPMULLQ %YMM0,%YMM12,%YMM2 |
(664) 0x453b1b VPADDQ %YMM2,%YMM30,%YMM1 |
(664) 0x453b21 VXORPD %XMM4,%XMM4,%XMM4 |
(664) 0x453b25 KXNORW %K0,%K0,%K1 |
(664) 0x453b29 MOV 0x58(%RSP),%RAX |
(664) 0x453b2e VGATHERQPD (%RAX,%YMM1,8),%YMM4{%K1} |
(664) 0x453b35 VPSUBD %XMM11,%XMM3,%XMM1 |
(664) 0x453b3a VPMOVSXDQ %XMM1,%YMM5 |
(664) 0x453b3f VXORPS %XMM6,%XMM6,%XMM6 |
(664) 0x453b43 VPMULLQ %YMM5,%YMM12,%YMM6 |
(664) 0x453b49 VPADDQ %YMM6,%YMM30,%YMM1 |
(664) 0x453b4f VXORPD %XMM7,%XMM7,%XMM7 |
(664) 0x453b53 KXNORW %K0,%K0,%K1 |
(664) 0x453b57 VGATHERQPD (%RAX,%YMM1,8),%YMM7{%K1} |
(664) 0x453b5e VPMOVSXDQ %XMM29,%YMM1 |
(664) 0x453b64 VPADDQ %YMM1,%YMM2,%YMM2 |
(664) 0x453b68 VPXORD %XMM30,%XMM30,%XMM30 |
(664) 0x453b6e KXNORW %K0,%K0,%K1 |
(664) 0x453b72 VGATHERQPD (%RAX,%YMM2,8),%YMM30{%K1} |
(664) 0x453b79 VPADDQ %YMM1,%YMM6,%YMM2 |
(664) 0x453b7d VPXOR %XMM6,%XMM6,%XMM6 |
(664) 0x453b81 KXNORW %K0,%K0,%K1 |
(664) 0x453b85 VGATHERQPD (%RAX,%YMM2,8),%YMM6{%K1} |
(664) 0x453b8c VXORPS %XMM2,%XMM2,%XMM2 |
(664) 0x453b90 VPMULLQ %YMM5,%YMM14,%YMM2 |
(664) 0x453b96 VPADDQ %YMM1,%YMM2,%YMM31 |
(664) 0x453b9c VXORPD %XMM24,%XMM24,%XMM24 |
(664) 0x453ba2 KXNORW %K0,%K0,%K1 |
(664) 0x453ba6 MOV 0x50(%RSP),%RAX |
(664) 0x453bab VGATHERQPD (%RAX,%YMM31,8),%YMM24{%K1} |
(664) 0x453bb2 VPSUBD %XMM11,%XMM29,%XMM31 |
(664) 0x453bb8 VPMOVSXDQ %XMM31,%YMM21 |
(664) 0x453bbe VPADDQ %YMM21,%YMM2,%YMM2 |
(664) 0x453bc4 VXORPD %XMM13,%XMM13,%XMM13 |
(664) 0x453bc9 KXNORW %K0,%K0,%K1 |
(664) 0x453bcd VGATHERQPD (%RAX,%YMM2,8),%YMM13{%K1} |
(664) 0x453bd4 VXORPS %XMM2,%XMM2,%XMM2 |
(664) 0x453bd8 VPMULLQ %YMM0,%YMM14,%YMM2 |
(664) 0x453bde VPADDQ %YMM1,%YMM2,%YMM16 |
(664) 0x453be4 VXORPD %XMM17,%XMM17,%XMM17 |
(664) 0x453bea KXNORW %K0,%K0,%K1 |
(664) 0x453bee VGATHERQPD (%RAX,%YMM16,8),%YMM17{%K1} |
(664) 0x453bf5 VPADDQ %YMM21,%YMM2,%YMM2 |
(664) 0x453bfb VXORPD %XMM16,%XMM16,%XMM16 |
(664) 0x453c01 KXNORW %K0,%K0,%K1 |
(664) 0x453c05 VGATHERQPD (%RAX,%YMM2,8),%YMM16{%K1} |
(664) 0x453c0c VPMULLQ %YMM0,%YMM15,%YMM10 |
(664) 0x453c12 VPADDQ %YMM21,%YMM10,%YMM2 |
(664) 0x453c18 VPXORD %XMM21,%XMM21,%XMM21 |
(664) 0x453c1e KXNORW %K0,%K0,%K1 |
(664) 0x453c22 MOV 0x48(%RSP),%RDX |
(664) 0x453c27 VGATHERQPD (%RDX,%YMM2,8),%YMM21{%K1} |
(664) 0x453c2e VXORPD %XMM2,%XMM2,%XMM2 |
(664) 0x453c32 KXNORW %K0,%K0,%K1 |
(664) 0x453c36 MOV 0x18(%RSP),%RAX |
(664) 0x453c3b VGATHERDPD (%RAX,%XMM29,8),%YMM2{%K1} |
(664) 0x453c42 VSUBPD %YMM30,%YMM7,%YMM7 |
(664) 0x453c48 VSUBPD %YMM4,%YMM6,%YMM6 |
(664) 0x453c4c VSUBPD %YMM6,%YMM7,%YMM30 |
(664) 0x453c52 VXORPD %XMM4,%XMM4,%XMM4 |
(664) 0x453c56 KXNORW %K0,%K0,%K1 |
(664) 0x453c5a MOV 0x10(%RSP),%RCX |
(664) 0x453c5f VGATHERDPD (%RCX,%XMM3,8),%YMM4{%K1} |
(664) 0x453c66 VPADDD %XMM11,%XMM29,%XMM29 |
(664) 0x453c6c VPMOVSXDQ %XMM29,%YMM29 |
(664) 0x453c72 VPADDQ %YMM29,%YMM10,%YMM10 |
(664) 0x453c78 VPXORD %XMM29,%XMM29,%XMM29 |
(664) 0x453c7e KXNORW %K0,%K0,%K1 |
(664) 0x453c82 VGATHERQPD (%RDX,%YMM10,8),%YMM29{%K1} |
(664) 0x453c89 VSUBPD %YMM17,%YMM13,%YMM10 |
(664) 0x453c8f VSUBPD %YMM24,%YMM16,%YMM13 |
(664) 0x453c95 VSUBPD %YMM13,%YMM10,%YMM16 |
(664) 0x453c9b VXORPD %XMM17,%XMM17,%XMM17 |
(664) 0x453ca1 KXNORW %K0,%K0,%K1 |
(664) 0x453ca5 VGATHERDPD (%RAX,%XMM31,8),%YMM17{%K1} |
(664) 0x453cac VMULPD %YMM30,%YMM2,%YMM24 |
(664) 0x453cb2 VFMADD231PD %YMM16,%YMM4,%YMM24 |
(664) 0x453cb8 VADDPD %YMM7,%YMM6,%YMM6 |
(664) 0x453cbc VPMULLQ %YMM5,%YMM15,%YMM5 |
(664) 0x453cc2 VMULPD %YMM22,%YMM6,%YMM6 |
(664) 0x453cc8 VADDPD %YMM10,%YMM13,%YMM7 |
(664) 0x453ccd VPADDQ %YMM1,%YMM5,%YMM5 |
(664) 0x453cd1 VXORPD %XMM10,%XMM10,%XMM10 |
(664) 0x453cd6 KXNORW %K0,%K0,%K1 |
(664) 0x453cda VGATHERQPD (%RDX,%YMM5,8),%YMM10{%K1} |
(664) 0x453ce1 VDIVPD %YMM2,%YMM23,%YMM13 |
(664) 0x453ce7 VPADDD %XMM3,%XMM11,%XMM5 |
(664) 0x453ceb VPMOVSXDQ %XMM5,%YMM5 |
(664) 0x453cf0 VPMULLQ %YMM5,%YMM15,%YMM5 |
(664) 0x453cf6 VMULPD %YMM13,%YMM22,%YMM31 |
(664) 0x453cfc VMULPD %YMM31,%YMM7,%YMM7 |
(664) 0x453d02 VPADDQ %YMM1,%YMM5,%YMM5 |
(664) 0x453d06 VXORPD %XMM31,%XMM31,%XMM31 |
(664) 0x453d0c KXNORW %K0,%K0,%K1 |
(664) 0x453d10 VGATHERQPD (%RDX,%YMM5,8),%YMM31{%K1} |
(664) 0x453d17 VDIVPD %YMM4,%YMM23,%YMM19 |
(664) 0x453d1d VFMADD231PD %YMM6,%YMM19,%YMM7 |
(664) 0x453d23 VPADDD 0x14483(%RIP){1to4},%XMM3,%XMM3 |
(664) 0x453d2d VXORPD %XMM6,%XMM6,%XMM6 |
(664) 0x453d31 KXNORW %K0,%K0,%K1 |
(664) 0x453d35 VGATHERDPD (%RCX,%XMM3,8),%YMM6{%K1} |
(664) 0x453d3c VSUBPD %YMM29,%YMM21,%YMM3 |
(664) 0x453d42 VADDPD %YMM2,%YMM17,%YMM5 |
(664) 0x453d48 VDIVPD %YMM5,%YMM3,%YMM5 |
(664) 0x453d4c VSUBPD %YMM31,%YMM10,%YMM3 |
(664) 0x453d52 VADDPD %YMM4,%YMM6,%YMM6 |
(664) 0x453d56 VDIVPD %YMM6,%YMM3,%YMM6 |
(664) 0x453d5a VMULPD %YMM6,%YMM6,%YMM3 |
(664) 0x453d5e VMULPD %YMM7,%YMM6,%YMM7 |
(664) 0x453d62 VMULPD %YMM22,%YMM30,%YMM10 |
(664) 0x453d68 VMULPD %YMM5,%YMM13,%YMM13 |
(664) 0x453d6c VFMADD213PD %YMM7,%YMM10,%YMM13 |
(664) 0x453d71 VMULPD %YMM5,%YMM13,%YMM7 |
(664) 0x453d75 VMULPD %YMM22,%YMM16,%YMM10 |
(664) 0x453d7b VMULPD %YMM3,%YMM19,%YMM13 |
(664) 0x453d81 VFMADD213PD %YMM7,%YMM10,%YMM13 |
(664) 0x453d86 VFMADD231PD %YMM5,%YMM5,%YMM3 |
(664) 0x453d8b VMAXPD %YMM25,%YMM3,%YMM3 |
(664) 0x453d91 VDIVPD %YMM3,%YMM13,%YMM3 |
(664) 0x453d95 VCMPPD $0x2,%YMM20,%YMM3,%K1 |
(664) 0x453d9c VCMPPD $0x1,%YMM20,%YMM24,%K1{%K1} |
(664) 0x453da3 KNOTW %K1,%K2 |
(664) 0x453da7 VMOVDQU 0xc0(%RSP),%YMM7 |
(664) 0x453db0 VPMULLQ %YMM0,%YMM7,%YMM7 |
(664) 0x453db6 VPADDQ %YMM1,%YMM7,%YMM30 |
(664) 0x453dbc MOV 0x40(%RSP),%RAX |
(664) 0x453dc1 VSCATTERQPD %YMM20,(%RAX,%YMM30,8){%K2} |
(664) 0x453dc8 KMOVD %K1,%EAX |
(664) 0x453dcc TEST $0xf,%AL |
(664) 0x453dce JE 453dda |
(664) 0x453dd0 MOV 0x28(%RSP),%RCX |
(664) 0x453dd5 VMOVQ 0x10(%RCX),%XMM7 |
(664) 0x453dda TEST $0xf,%AL |
(664) 0x453ddc JE 4539e0 |
(664) 0x453de2 MOV 0x28(%RSP),%RAX |
(664) 0x453de7 VMOVQ (%RAX),%XMM29 |
(664) 0x453ded JMP 4539e0 |
0x453df2 LEA -0x28(%RBP),%RSP |
0x453df6 POP %RBX |
0x453df7 POP %R12 |
0x453df9 POP %R13 |
0x453dfb POP %R14 |
0x453dfd POP %R15 |
0x453dff POP %RBP |
0x453e00 RET |
0x453e01 CMP %R12,0x90(%RSP) |
0x453e09 JNE 453e32 |
0x453e0b MOV $0x6865d0,%EDI |
0x453e10 MOV 0x30(%RSP),%ESI |
0x453e14 LEA -0x28(%RBP),%RSP |
0x453e18 POP %RBX |
0x453e19 POP %R12 |
0x453e1b POP %R13 |
0x453e1d POP %R14 |
0x453e1f POP %R15 |
0x453e21 POP %RBP |
0x453e22 VZEROUPPER |
0x453e25 JMP 402e90 |
0x453e2a XOR %R12D,%R12D |
0x453e2d MOV %RSI,%RDI |
0x453e30 JMP 453e3b |
0x453e32 LEA (%RSI,%R12,1),%RDI |
0x453e36 MOV 0x20(%RSP),%RAX |
0x453e3b INC %RAX |
0x453e3e MOV %RAX,0xc0(%RSP) |
0x453e46 ADD %R12,%RSI |
0x453e49 VMOVSD 0x12297(%RIP),%XMM0 |
0x453e51 VMOVSD 0x11877(%RIP),%XMM1 |
0x453e59 VMOVSD 0x11207(%RIP),%XMM2 |
0x453e61 VXORPD %XMM3,%XMM3,%XMM3 |
0x453e65 VMOVDDUP 0x1187b(%RIP),%XMM4 |
0x453e6d VMOVDDUP 0x1243b(%RIP),%XMM5 |
0x453e75 MOV %R13,0xe0(%RSP) |
0x453e7d MOV %RBX,0x80(%RSP) |
0x453e85 MOV %R14,0x70(%RSP) |
0x453e8a MOV %R15,0x98(%RSP) |
0x453e92 MOV 0x38(%RSP),%R14 |
0x453e97 JMP 453ee6 |
0x453e99 NOPL (%RAX) |
(663) 0x453ea0 MOV 0x70(%RSP),%R8 |
(663) 0x453ea5 IMUL %R8,%RDX |
(663) 0x453ea9 ADD %RDI,%RDX |
(663) 0x453eac MOV 0x40(%RSP),%RAX |
(663) 0x453eb1 VMOVSD %XMM13,(%RAX,%RDX,8) |
(663) 0x453eb6 MOV 0xa0(%RSP),%RAX |
(663) 0x453ebe LEA (%RAX,%R12,1),%RDI |
(663) 0x453ec2 INC %R12 |
(663) 0x453ec5 INC %RSI |
(663) 0x453ec8 CMP %RSI,0xb0(%RSP) |
(663) 0x453ed0 MOV 0xe0(%RSP),%R13 |
(663) 0x453ed8 MOV 0x98(%RSP),%R15 |
(663) 0x453ee0 JE 453e0b |
(663) 0x453ee6 MOV %RSI,%RAX |
(663) 0x453ee9 SHR $0x20,%RAX |
(663) 0x453eed JE 453f10 |
(663) 0x453eef MOV %RSI,%RAX |
(663) 0x453ef2 CQTO |
(663) 0x453ef4 IDIV %R13 |
(663) 0x453ef7 MOV %RDX,%RCX |
(663) 0x453efa MOV %RDI,%RAX |
(663) 0x453efd SHR $0x20,%RAX |
(663) 0x453f01 JE 453f22 |
(663) 0x453f03 MOV %RDI,%RAX |
(663) 0x453f06 XOR %EDX,%EDX |
(663) 0x453f08 DIV %R13 |
(663) 0x453f0b JMP 453f29 |
0x453f0d NOPL (%RAX) |
(663) 0x453f10 MOV %ESI,%EAX |
(663) 0x453f12 XOR %EDX,%EDX |
(663) 0x453f14 DIV %R13D |
(663) 0x453f17 MOV %EDX,%ECX |
(663) 0x453f19 MOV %RDI,%RAX |
(663) 0x453f1c SHR $0x20,%RAX |
(663) 0x453f20 JNE 453f03 |
(663) 0x453f22 MOV %EDI,%EAX |
(663) 0x453f24 XOR %EDX,%EDX |
(663) 0x453f26 DIV %R13D |
(663) 0x453f29 MOV 0xc0(%RSP),%RDX |
(663) 0x453f31 ADD %ECX,%EDX |
(663) 0x453f33 MOVSXD %EDX,%RDI |
(663) 0x453f36 LEA (%R14,%RAX,1),%EDX |
(663) 0x453f3a MOVSXD %EDX,%RDX |
(663) 0x453f3d MOV %R15,%R10 |
(663) 0x453f40 IMUL %RDX,%R10 |
(663) 0x453f44 LEA (%RDI,%R10,1),%R11 |
(663) 0x453f48 LEA 0x1(%R14,%RAX,1),%R9D |
(663) 0x453f4d MOVSXD %R9D,%R9 |
(663) 0x453f50 MOV %R15,%R8 |
(663) 0x453f53 IMUL %R9,%R8 |
(663) 0x453f57 ADD %R8,%RDI |
(663) 0x453f5a MOV 0x58(%RSP),%R13 |
(663) 0x453f5f VMOVSD (%R13,%RDI,8),%XMM6 |
(663) 0x453f66 MOV 0x20(%RSP),%R15 |
(663) 0x453f6b LEA (%R15,%RCX,1),%EDI |
(663) 0x453f6f MOVSXD %EDI,%RDI |
(663) 0x453f72 ADD %RDI,%R10 |
(663) 0x453f75 ADD %RDI,%R8 |
(663) 0x453f78 VMOVSD (%R13,%R8,8),%XMM7 |
(663) 0x453f7f VSUBSD (%R13,%R10,8),%XMM6,%XMM6 |
(663) 0x453f86 VSUBSD (%R13,%R11,8),%XMM7,%XMM7 |
(663) 0x453f8d MOV 0xa8(%RSP),%R13 |
(663) 0x453f95 MOV %R13,%R8 |
(663) 0x453f98 IMUL %R9,%R8 |
(663) 0x453f9c LEA (%R8,%RDI,1),%R11 |
(663) 0x453fa0 LEA 0x1(%R15,%RCX,1),%R10D |
(663) 0x453fa5 MOVSXD %R10D,%R10 |
(663) 0x453fa8 ADD %R10,%R8 |
(663) 0x453fab MOV 0x50(%RSP),%RBX |
(663) 0x453fb0 VMOVSD (%RBX,%R8,8),%XMM8 |
(663) 0x453fb6 MOV %R13,%R8 |
(663) 0x453fb9 IMUL %RDX,%R8 |
(663) 0x453fbd LEA (%R8,%RDI,1),%R13 |
(663) 0x453fc1 ADD %R10,%R8 |
(663) 0x453fc4 VMOVSD (%RBX,%R8,8),%XMM9 |
(663) 0x453fca VSUBSD (%RBX,%R13,8),%XMM8,%XMM12 |
(663) 0x453fd0 VSUBSD (%RBX,%R11,8),%XMM9,%XMM13 |
(663) 0x453fd6 MOV 0x80(%RSP),%R11 |
(663) 0x453fde IMUL %R11,%R9 |
(663) 0x453fe2 ADD %RDI,%R9 |
(663) 0x453fe5 MOV 0x48(%RSP),%RBX |
(663) 0x453fea VMOVSD (%RBX,%R9,8),%XMM8 |
(663) 0x453ff0 MOV %R11,%R8 |
(663) 0x453ff3 IMUL %RDX,%R8 |
(663) 0x453ff7 LEA -0x1(%R15,%RCX,1),%ECX |
(663) 0x453ffc MOVSXD %ECX,%RCX |
(663) 0x453fff ADD %R8,%RCX |
(663) 0x454002 ADD %R10,%R8 |
(663) 0x454005 VMOVHPD (%RBX,%R8,8),%XMM8,%XMM14 |
(663) 0x45400b LEA -0x1(%R14,%RAX,1),%R8D |
(663) 0x454010 MOVSXD %R8D,%R8 |
(663) 0x454013 IMUL %R11,%R8 |
(663) 0x454017 ADD %RDI,%R8 |
(663) 0x45401a VMOVSD (%RBX,%R8,8),%XMM8 |
(663) 0x454020 VMOVHPD (%RBX,%RCX,8),%XMM8,%XMM15 |
(663) 0x454025 LEA 0x2(%R14,%RAX,1),%EAX |
(663) 0x45402a CLTQ |
(663) 0x45402c MOV 0x10(%RSP),%RCX |
(663) 0x454031 VMOVSD (%RCX,%RAX,8),%XMM8 |
(663) 0x454036 MOV 0x18(%RSP),%RAX |
(663) 0x45403b VMOVHPD (%RAX,%R10,8),%XMM8,%XMM16 |
(663) 0x454042 VSUBSD %XMM7,%XMM6,%XMM11 |
(663) 0x454046 VSUBSD %XMM13,%XMM12,%XMM10 |
(663) 0x45404b VMOVSD (%RAX,%RDI,8),%XMM9 |
(663) 0x454050 VMOVSD (%RCX,%RDX,8),%XMM8 |
(663) 0x454055 VADDSD %XMM6,%XMM7,%XMM6 |
(663) 0x454059 VMULSD %XMM0,%XMM6,%XMM6 |
(663) 0x45405d VADDSD %XMM12,%XMM13,%XMM7 |
(663) 0x454062 VDIVSD %XMM9,%XMM1,%XMM13 |
(663) 0x454067 VMULSD %XMM0,%XMM13,%XMM12 |
(663) 0x45406b VMULSD %XMM12,%XMM7,%XMM17 |
(663) 0x454071 VDIVSD %XMM8,%XMM1,%XMM18 |
(663) 0x454077 VFMADD231SD %XMM6,%XMM18,%XMM17 |
(663) 0x45407d VSUBPD %XMM15,%XMM14,%XMM6 |
(663) 0x454082 VPUNPCKLQDQ %XMM9,%XMM8,%XMM7 |
(663) 0x454087 VADDPD %XMM7,%XMM16,%XMM7 |
(663) 0x45408d VDIVPD %XMM7,%XMM6,%XMM7 |
(663) 0x454091 VMULPD %XMM7,%XMM7,%XMM6 |
(663) 0x454095 VSHUFPD $0x1,%XMM6,%XMM6,%XMM14 |
(663) 0x45409a VSHUFPD $0x1,%XMM7,%XMM7,%XMM12 |
(663) 0x45409f VMULSD %XMM17,%XMM12,%XMM15 |
(663) 0x4540a5 VMULSD %XMM0,%XMM11,%XMM16 |
(663) 0x4540ab VMULSD %XMM7,%XMM15,%XMM15 |
(663) 0x4540af VMULSD %XMM13,%XMM16,%XMM13 |
(663) 0x4540b5 VFMADD213SD %XMM15,%XMM14,%XMM13 |
(663) 0x4540ba VMULSD %XMM0,%XMM10,%XMM15 |
(663) 0x4540be VMULSD %XMM6,%XMM18,%XMM16 |
(663) 0x4540c4 VFMADD213SD %XMM13,%XMM15,%XMM16 |
(663) 0x4540ca VADDSD %XMM6,%XMM14,%XMM6 |
(663) 0x4540ce VMAXSD %XMM2,%XMM6,%XMM6 |
(663) 0x4540d2 VDIVSD %XMM6,%XMM16,%XMM6 |
(663) 0x4540d8 VXORPD %XMM13,%XMM13,%XMM13 |
(663) 0x4540dd VUCOMISD %XMM13,%XMM6 |
(663) 0x4540e2 JA 453ea0 |
(663) 0x4540e8 VMULSD %XMM11,%XMM9,%XMM11 |
(663) 0x4540ed VFMADD213SD %XMM11,%XMM8,%XMM10 |
(663) 0x4540f2 VUCOMISD %XMM3,%XMM10 |
(663) 0x4540f6 MOV 0x70(%RSP),%R8 |
(663) 0x4540fb JAE 453ea5 |
(663) 0x454101 VANDPD %XMM4,%XMM12,%XMM10 |
(663) 0x454105 VMAXSD %XMM2,%XMM10,%XMM10 |
(663) 0x454109 VXORPD %XMM5,%XMM10,%XMM11 |
(663) 0x45410d VCMPSD $0x1,%XMM3,%XMM12,%K1 |
(663) 0x454114 VMOVSD %XMM11,%XMM10,%XMM10{%K1} |
(663) 0x45411a VANDPD %XMM4,%XMM7,%XMM7 |
(663) 0x45411e VMAXSD %XMM2,%XMM7,%XMM7 |
(663) 0x454122 VXORPD %XMM5,%XMM7,%XMM11 |
(663) 0x454126 VCMPSD $0x1,%XMM3,%XMM10,%K1 |
(663) 0x45412d VMOVSD %XMM11,%XMM7,%XMM7{%K1} |
(663) 0x454133 VMULSD %XMM10,%XMM10,%XMM11 |
(663) 0x454138 VFMADD231SD %XMM7,%XMM7,%XMM11 |
(663) 0x45413d VSQRTSD %XMM11,%XMM11,%XMM11 |
(663) 0x454142 VMULSD %XMM9,%XMM11,%XMM9 |
(663) 0x454147 VDIVSD %XMM10,%XMM9,%XMM9 |
(663) 0x45414c VANDPD %XMM4,%XMM9,%XMM9 |
(663) 0x454150 VMULSD %XMM8,%XMM11,%XMM8 |
(663) 0x454155 VDIVSD %XMM7,%XMM8,%XMM7 |
(663) 0x454159 VANDPD %XMM4,%XMM7,%XMM7 |
(663) 0x45415d VMINSD %XMM7,%XMM9,%XMM7 |
(663) 0x454161 MOV 0x28(%RSP),%RCX |
(663) 0x454166 MOV 0x10(%RCX),%RAX |
(663) 0x45416a VMULSD %XMM6,%XMM7,%XMM6 |
(663) 0x45416e VMULSD %XMM6,%XMM6,%XMM6 |
(663) 0x454172 MOV (%RCX),%RCX |
(663) 0x454175 IMUL %RDX,%RCX |
(663) 0x454179 ADD %RDI,%RCX |
(663) 0x45417c VMOVSD (%RAX,%RCX,8),%XMM7 |
(663) 0x454181 VADDSD %XMM7,%XMM7,%XMM7 |
(663) 0x454185 VMULSD %XMM7,%XMM6,%XMM13 |
(663) 0x454189 JMP 453ea5 |
0x45418e XCHG %AX,%AX |
Path / |
Source file and lines | viscosity.cpp:36-66 |
Module | exec |
nb instructions | 157 |
nb uops | 159 |
loop length | 755 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 14 |
used zmm registers | 0 |
nb stack references | 34 |
micro-operation queue | 26.50 cycles |
front end | 26.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 17.67 | 17.67 | 20.50 | 10.00 | 5.40 | 20.50 | 20.50 | 20.50 | 5.40 | 17.67 |
cycles | 5.60 | 5.60 | 17.67 | 17.67 | 20.50 | 10.00 | 5.40 | 20.50 | 20.50 | 20.50 | 5.40 | 17.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 26.31 |
Stall cycles | 0.00 |
Front-end | 26.50 |
Dispatch | 20.50 |
Overall L1 | 26.50 |
all | 17% |
load | 14% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 16% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 28% |
all | 17% |
load | 5% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 16% |
load | 16% |
store | 17% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 14% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 15% |
load | 14% |
store | 17% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x160,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 453df2 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x652> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xc0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x70(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6865b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403020 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 453e0b <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x66b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R14),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMOVG %RCX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x4,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RCX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RDX,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 453e2a <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x68a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R13,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %ECX,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %XMM1,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R15,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %EAX,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %XMM1,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDX,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RBX,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R14,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM1,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDI,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM1,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVDQU64 0x12792(%RIP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQU %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPADDQ %YMM18,%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM20,%XMM20,%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPCMPEQD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VBROADCASTSD 0x12742(%RIP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11d20(%RIP),%YMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x116ae(%RIP),%YMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11d24(%RIP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x128e2(%RIP),%YMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x12720(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 453ab7 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x317> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
CMP %R12,0x90(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 453e32 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x692> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x6865d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x30(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402e90 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 453e3b <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x69b> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RSI,%R12,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R12,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD 0x12297(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x11877(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x11207(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x1187b(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x1243b(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 453ee6 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x746> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | viscosity.cpp:36-66 |
Module | exec |
nb instructions | 157 |
nb uops | 159 |
loop length | 755 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 8 |
used ymm registers | 14 |
used zmm registers | 0 |
nb stack references | 34 |
micro-operation queue | 26.50 cycles |
front end | 26.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 5.60 | 5.60 | 17.67 | 17.67 | 20.50 | 10.00 | 5.40 | 20.50 | 20.50 | 20.50 | 5.40 | 17.67 |
cycles | 5.60 | 5.60 | 17.67 | 17.67 | 20.50 | 10.00 | 5.40 | 20.50 | 20.50 | 20.50 | 5.40 | 17.67 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 26.31 |
Stall cycles | 0.00 |
Front-end | 26.50 |
Dispatch | 20.50 |
Overall L1 | 26.50 |
all | 17% |
load | 14% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 10% |
all | 16% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 28% |
all | 17% |
load | 5% |
store | 19% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 50% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 14% |
all | 16% |
load | 16% |
store | 17% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 12% |
all | 14% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 16% |
all | 15% |
load | 14% |
store | 17% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 28% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 13% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
AND $-0x20,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SUB $0x160,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,0x28(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x48(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x38(%RBP),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x20(%RBP),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBP),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%RBP),%EDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RDX,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVL $0,0x34(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JS 453df2 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x652> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R9,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,0x20(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,0x68(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0x60(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0x1,0xb8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x8,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
LEA 0xc0(%RSP),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x3c(%RSP),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x70(%RSP),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA 0x68(%RSP),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV $0x6865b0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %ESI,0x38(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x22,%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH $0x1 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RAX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403020 <__kmpc_for_static_init_8@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
ADD $0x20,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x68(%RSP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x60(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RCX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JA 453e0b <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x66b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R15,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%R14),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RBX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RBX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x50(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x18(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x18(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x10(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x8(%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x10(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%RDI),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %RCX,0x48(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R12),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x10(%R12),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%RSI),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,0xa0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMOVG %RCX,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
MOV %RAX,0xb0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RSI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %EAX,%R13D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV $-0x4,%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
AND %RCX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %RDX,0xa8(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDI,0x40(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JE 453e2a <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x68a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RCX,0x90(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RSI,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R13,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV 0x38(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VPBROADCASTD %ECX,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %XMM1,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %R15,%YMM12 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTD %EAX,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %XMM1,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDX,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %RBX,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPBROADCASTQ %R14,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM1,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPBROADCASTQ %RDI,%YMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM1,0x120(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VMOVDQU64 0x12792(%RIP),%YMM18 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
VMOVDQU %YMM0,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
VPADDQ %YMM18,%YMM0,%YMM9 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VPBROADCASTQ %RAX,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VMOVDQU %YMM0,0x100(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VXORPD %XMM20,%XMM20,%XMM20 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPCMPEQD %XMM11,%XMM11,%XMM11 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
VBROADCASTSD 0x12742(%RIP),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11d20(%RIP),%YMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x116ae(%RIP),%YMM25 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x11d24(%RIP),%YMM26 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VBROADCASTSD 0x128e2(%RIP),%YMM27 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
VPBROADCASTQ 0x12720(%RIP),%YMM28 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
JMP 453ab7 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x317> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
CMP %R12,0x90(%RSP) | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
JNE 453e32 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x692> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV $0x6865d0,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV 0x30(%RSP),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
JMP 402e90 <__kmpc_for_static_fini@plt> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
XOR %R12D,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
JMP 453e3b <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x69b> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
LEA (%RSI,%R12,1),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x20(%RSP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
INC %RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RAX,0xc0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %R12,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VMOVSD 0x12297(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x11877(%RIP),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVSD 0x11207(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VXORPD %XMM3,%XMM3,%XMM3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVDDUP 0x1187b(%RIP),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMOVDDUP 0x1243b(%RIP),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,0xe0(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x80(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R14,0x70(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R15,0x98(%RSP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x38(%RSP),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 453ee6 <_Z16viscosity_kerneliiiiRN6clover8Buffer1DIdEES2_RNS_8Buffer2DIdEES5_S5_S5_S5_.extracted+0x746> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼viscosity_kernel(int, int, int, int, clover::Buffer1D | 2.77 | 4.41 |
○Loop 664 - viscosity.cpp:37-66 - exec | 2.77 | 4.4 |
○Loop 663 - viscosity.cpp:37-66 - exec | 0 | 0 |