Function: hypre_CSRMatrixMultiply._omp_fn.0 | Module: exec | Source: csr_matop.c:185-302 [...] | Coverage: 1.36% |
---|
Function: hypre_CSRMatrixMultiply._omp_fn.0 | Module: exec | Source: csr_matop.c:185-302 [...] | Coverage: 1.36% |
---|
/home/kcamus/qaas_runs/169-443-9681/intel/AMG/build/AMG/AMG/seq_mv/csr_matop.c: 185 - 302 |
-------------------------------------------------------------------------------- |
185: #pragma omp parallel private(ia, ib, ic, ja, jb, num_nonzeros, row_start, counter, a_entry, b_entry) |
[...] |
192: ii = hypre_GetThreadNum(); |
193: num_threads = hypre_NumActiveThreads(); |
194: |
195: size = nrows_A/num_threads; |
196: rest = nrows_A - size*num_threads; |
197: if (ii < rest) |
198: { |
199: ns = ii*size+ii; |
200: ne = (ii+1)*size+ii+1; |
201: } |
202: else |
203: { |
204: ns = ii*size+rest; |
205: ne = (ii+1)*size+rest; |
206: } |
207: |
208: B_marker = hypre_CTAlloc(HYPRE_Int, ncols_B); |
209: |
210: for (ib = 0; ib < ncols_B; ib++) |
211: B_marker[ib] = -1; |
212: |
213: num_nonzeros = 0; |
214: for (ic = ns; ic < ne; ic++) |
215: { |
216: C_i[ic] = num_nonzeros; |
217: if (allsquare) |
218: { |
219: B_marker[ic] = ic; |
220: num_nonzeros++; |
221: } |
222: for (ia = A_i[ic]; ia < A_i[ic+1]; ia++) |
223: { |
224: ja = A_j[ia]; |
225: for (ib = B_i[ja]; ib < B_i[ja+1]; ib++) |
226: { |
227: jb = B_j[ib]; |
228: if (B_marker[jb] != ic) |
229: { |
230: B_marker[jb] = ic; |
231: num_nonzeros++; |
232: } |
233: } |
234: } |
235: } |
236: jj_count[ii] = num_nonzeros; |
237: |
238: #ifdef HYPRE_USING_OPENMP |
239: #pragma omp barrier |
240: #endif |
241: |
242: if (ii) |
243: { |
244: jj = jj_count[0]; |
245: for (i1 = 1; i1 < ii; i1++) |
246: jj += jj_count[i1]; |
247: |
248: for (i1 = ns; i1 < ne; i1++) |
249: C_i[i1] += jj; |
250: } |
251: else |
252: { |
253: C_i[nrows_A] = 0; |
254: for (i1 = 0; i1 < num_threads; i1++) |
255: C_i[nrows_A] += jj_count[i1]; |
256: |
257: C = hypre_CSRMatrixCreate(nrows_A, ncols_B, C_i[nrows_A]); |
258: hypre_CSRMatrixI(C) = C_i; |
259: hypre_CSRMatrixInitialize(C); |
260: C_j = hypre_CSRMatrixJ(C); |
261: C_data = hypre_CSRMatrixData(C); |
262: } |
263: |
264: #ifdef HYPRE_USING_OPENMP |
265: #pragma omp barrier |
266: #endif |
267: |
268: for (ib = 0; ib < ncols_B; ib++) |
269: B_marker[ib] = -1; |
270: |
271: counter = C_i[ns]; |
272: for (ic = ns; ic < ne; ic++) |
273: { |
274: row_start = C_i[ic]; |
275: if (allsquare) |
276: { |
277: B_marker[ic] = counter; |
278: C_data[counter] = 0; |
279: C_j[counter] = ic; |
280: counter++; |
281: } |
282: for (ia = A_i[ic]; ia < A_i[ic+1]; ia++) |
283: { |
284: ja = A_j[ia]; |
285: a_entry = A_data[ia]; |
286: for (ib = B_i[ja]; ib < B_i[ja+1]; ib++) |
287: { |
288: jb = B_j[ib]; |
289: b_entry = B_data[ib]; |
290: if (B_marker[jb] < row_start) |
291: { |
292: B_marker[jb] = counter; |
293: C_j[B_marker[jb]] = jb; |
294: C_data[B_marker[jb]] = a_entry*b_entry; |
295: counter++; |
296: } |
297: else |
298: C_data[B_marker[jb]] += a_entry*b_entry; |
299: } |
300: } |
301: } |
302: hypre_TFree(B_marker); |
0x596f90 PUSH %RBP |
0x596f91 MOV %RSP,%RBP |
0x596f94 PUSH %R15 |
0x596f96 PUSH %R14 |
0x596f98 PUSH %R13 |
0x596f9a PUSH %R12 |
0x596f9c PUSH %RBX |
0x596f9d MOV %RDI,%RBX |
0x596fa0 AND $-0x40,%RSP |
0x596fa4 ADD $-0x80,%RSP |
0x596fa8 MOV 0x68(%RDI),%RCX |
0x596fac MOV 0x50(%RDI),%R11 |
0x596fb0 MOV %RDI,0x30(%RSP) |
0x596fb5 MOV 0x38(%RDI),%RDX |
0x596fb9 MOV 0x28(%RDI),%RSI |
0x596fbd MOV 0x18(%RDI),%R8 |
0x596fc1 MOV 0x60(%RDI),%RAX |
0x596fc5 MOV %RCX,0x58(%RSP) |
0x596fca MOV (%RBX),%R9 |
0x596fcd MOV 0x30(%RDI),%R12 |
0x596fd1 MOV %R11,0x78(%RSP) |
0x596fd6 MOV 0x20(%RDI),%R13 |
0x596fda MOV 0x10(%RDI),%R15 |
0x596fde MOV %RDX,0x40(%RSP) |
0x596fe3 MOV 0x8(%RDI),%RDI |
0x596fe7 MOV %RSI,0x70(%RSP) |
0x596fec MOV %R8,0x38(%RSP) |
0x596ff1 MOV %RDI,0x68(%RSP) |
0x596ff6 MOV %R9,0x60(%RSP) |
0x596ffb MOV %RAX,0x48(%RSP) |
0x597000 CALL 5b5180 <hypre_GetThreadNum> |
0x597005 MOV %RAX,%R14 |
0x597008 CALL 5b5170 <hypre_NumActiveThreads> |
0x59700d MOV 0x78(%RSP),%R11 |
0x597012 MOV 0x58(%RSP),%RCX |
0x597017 MOV %RAX,%R10 |
0x59701a MOV %RAX,0x28(%RSP) |
0x59701f MOV 0x38(%RSP),%RAX |
0x597024 CQTO |
0x597026 IDIV %R10 |
0x597029 CMP %R14,%RDX |
0x59702c JLE 5976f7 |
0x597032 MOV %R14,%RDI |
0x597035 IMUL %RAX,%RDI |
0x597039 ADD %RDI,%RAX |
0x59703c LEA (%R14,%RDI,1),%R9 |
0x597040 LEA 0x1(%R14,%RAX,1),%R10 |
0x597045 MOV %R9,0x78(%RSP) |
0x59704a MOV %R10,0x50(%RSP) |
0x59704f MOV 0x40(%RSP),%RDI |
0x597054 MOV $0x8,%ESI |
0x597059 MOV %R11,0x20(%RSP) |
0x59705e MOV %RCX,0x58(%RSP) |
0x597063 CALL 5b1df0 <hypre_CAlloc> |
0x597068 CMPQ $0,0x40(%RSP) |
0x59706e MOV 0x58(%RSP),%RCX |
0x597073 MOV 0x20(%RSP),%R11 |
0x597078 MOV %RAX,%RBX |
0x59707b JLE 5970ab |
0x59707d MOV 0x40(%RSP),%RAX |
0x597082 MOV $0xff,%ESI |
0x597087 MOV %RBX,%RDI |
0x59708a MOV %R11,0x20(%RSP) |
0x59708f MOV %RCX,0x58(%RSP) |
0x597094 LEA (,%RAX,8),%RDX |
0x59709c CALL 40f0a0 <memset@plt> |
0x5970a1 MOV 0x58(%RSP),%RCX |
0x5970a6 MOV 0x20(%RSP),%R11 |
0x5970ab MOV 0x78(%RSP),%R8 |
0x5970b0 MOV 0x50(%RSP),%RDX |
0x5970b5 XOR %EDI,%EDI |
0x5970b7 MOV %R8,%RSI |
0x5970ba CMP %RDX,%R8 |
0x5970bd JGE 597172 |
0x5970c3 MOV %RCX,0x20(%RSP) |
0x5970c8 MOV %R13,%R8 |
0x5970cb MOV 0x70(%RSP),%R10 |
0x5970d0 MOV %R11,0x58(%RSP) |
0x5970d5 MOV 0x68(%RSP),%R13 |
0x5970da NOPW (%RAX,%RAX,1) |
(2997) 0x5970e0 MOV 0x58(%RSP),%R9 |
(2997) 0x5970e5 CMPQ $0,0x48(%RSP) |
(2997) 0x5970eb MOV %RDI,(%R9,%RSI,8) |
(2997) 0x5970ef JE 5970f8 |
(2997) 0x5970f1 MOV %RSI,(%RBX,%RSI,8) |
(2997) 0x5970f5 INC %RDI |
(2997) 0x5970f8 MOV (%R13,%RSI,8),%R9 |
(2997) 0x5970fd MOV 0x8(%R13,%RSI,8),%RDX |
(2997) 0x597102 CMP %RDX,%R9 |
(2997) 0x597105 JGE 597157 |
(2997) 0x597107 NOPW (%RAX,%RAX,1) |
(2998) 0x597110 MOV (%R15,%R9,8),%RCX |
(2998) 0x597114 LEA 0x8(%R10,%RCX,8),%R11 |
(2998) 0x597119 MOV (%R10,%RCX,8),%RAX |
(2998) 0x59711d MOV (%R11),%RCX |
(2998) 0x597120 CMP %RCX,%RAX |
(2998) 0x597123 JGE 59714f |
(2998) 0x597125 NOPL (%RAX) |
(2999) 0x597128 MOV (%R12,%RAX,8),%RDX |
(2999) 0x59712c LEA (%RBX,%RDX,8),%RDX |
(2999) 0x597130 CMP (%RDX),%RSI |
(2999) 0x597133 JE 5976b8 |
(2999) 0x597139 MOV %RSI,(%RDX) |
(2999) 0x59713c INC %RAX |
(2999) 0x59713f INC %RDI |
(2999) 0x597142 MOV (%R11),%RCX |
(2999) 0x597145 CMP %RAX,%RCX |
(2999) 0x597148 JG 597128 |
(2998) 0x59714a MOV 0x8(%R13,%RSI,8),%RDX |
(2998) 0x59714f INC %R9 |
(2998) 0x597152 CMP %RDX,%R9 |
(2998) 0x597155 JL 597110 |
(2997) 0x597157 INC %RSI |
(2997) 0x59715a CMP %RSI,0x50(%RSP) |
(2997) 0x59715f JNE 5970e0 |
0x597165 MOV 0x20(%RSP),%RCX |
0x59716a MOV 0x58(%RSP),%R11 |
0x59716f MOV %R8,%R13 |
0x597172 MOV %RDI,(%RCX,%R14,8) |
0x597176 MOV %R11,0x20(%RSP) |
0x59717b MOV %RCX,0x58(%RSP) |
0x597180 CALL 40f290 <GOMP_barrier@plt> |
0x597185 TEST %R14,%R14 |
0x597188 MOV 0x58(%RSP),%R10 |
0x59718d MOV 0x20(%RSP),%R11 |
0x597192 JE 597718 |
0x597198 MOV (%R10),%R8 |
0x59719b CMP $0x1,%R14 |
0x59719f JLE 597861 |
0x5971a5 LEA -0x2(%R14),%RDI |
0x5971a9 LEA -0x1(%R14),%R9 |
0x5971ad CMP $0x6,%RDI |
0x5971b1 JBE 59788c |
0x5971b7 MOV %R9,%RDI |
0x5971ba MOV %R10,%RAX |
0x5971bd VPXOR %XMM2,%XMM2,%XMM2 |
0x5971c1 SHR $0x3,%RDI |
0x5971c5 SAL $0x6,%RDI |
0x5971c9 LEA (%RDI,%R10,1),%RSI |
0x5971cd SUB $0x40,%RDI |
0x5971d1 SHR $0x6,%RDI |
0x5971d5 INC %RDI |
0x5971d8 AND $0x7,%EDI |
0x5971db JE 59726c |
0x5971e1 CMP $0x1,%RDI |
0x5971e5 JE 597259 |
0x5971e7 CMP $0x2,%RDI |
0x5971eb JE 59724b |
0x5971ed CMP $0x3,%RDI |
0x5971f1 JE 59723d |
0x5971f3 CMP $0x4,%RDI |
0x5971f7 JE 59722f |
0x5971f9 CMP $0x5,%RDI |
0x5971fd JE 597221 |
0x5971ff CMP $0x6,%RDI |
0x597203 JE 597213 |
0x597205 VMOVDQU64 0x8(%R10),%ZMM2 |
0x59720f LEA 0x40(%R10),%RAX |
0x597213 VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 |
0x59721d ADD $0x40,%RAX |
0x597221 VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 |
0x59722b ADD $0x40,%RAX |
0x59722f VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 |
0x597239 ADD $0x40,%RAX |
0x59723d VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 |
0x597247 ADD $0x40,%RAX |
0x59724b VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 |
0x597255 ADD $0x40,%RAX |
0x597259 VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 |
0x597263 ADD $0x40,%RAX |
0x597267 CMP %RSI,%RAX |
0x59726a JE 5972c7 |
(2996) 0x59726c VPADDQ 0x8(%RAX),%ZMM2,%ZMM1 |
(2996) 0x597276 ADD $0x200,%RAX |
(2996) 0x59727c VPADDQ -0x1b8(%RAX),%ZMM1,%ZMM3 |
(2996) 0x597286 VPADDQ -0x178(%RAX),%ZMM3,%ZMM4 |
(2996) 0x597290 VPADDQ -0x138(%RAX),%ZMM4,%ZMM5 |
(2996) 0x59729a VPADDQ -0xf8(%RAX),%ZMM5,%ZMM6 |
(2996) 0x5972a4 VPADDQ -0xb8(%RAX),%ZMM6,%ZMM7 |
(2996) 0x5972ae VPADDQ -0x78(%RAX),%ZMM7,%ZMM8 |
(2996) 0x5972b8 VPADDQ -0x38(%RAX),%ZMM8,%ZMM2 |
(2996) 0x5972c2 CMP %RSI,%RAX |
(2996) 0x5972c5 JNE 59726c |
0x5972c7 VEXTRACTI64X4 $0x1,%ZMM2,%YMM9 |
0x5972ce MOV %R9,%RCX |
0x5972d1 VPADDQ %YMM2,%YMM9,%YMM0 |
0x5972d5 AND $-0x8,%RCX |
0x5972d9 VEXTRACTI64X2 $0x1,%YMM0,%XMM10 |
0x5972e0 LEA 0x1(%RCX),%RDX |
0x5972e4 VPADDQ %XMM0,%XMM10,%XMM12 |
0x5972e8 VPSRLDQ $0x8,%XMM12,%XMM13 |
0x5972ee VPADDQ %XMM13,%XMM12,%XMM14 |
0x5972f3 VMOVQ %XMM14,%RAX |
0x5972f8 ADD %R8,%RAX |
0x5972fb AND $0x7,%R9D |
0x5972ff JE 597370 |
0x597301 MOV %R14,%RDI |
0x597304 SUB %RCX,%RDI |
0x597307 LEA -0x1(%RDI),%R9 |
0x59730b SUB $0x2,%RDI |
0x59730f CMP $0x2,%RDI |
0x597313 JBE 597348 |
0x597315 VPADDQ 0x8(%R10,%RCX,8),%YMM0,%YMM15 |
0x59731c VEXTRACTI64X2 $0x1,%YMM15,%XMM0 |
0x597323 VPADDQ %XMM0,%XMM15,%XMM3 |
0x597327 VPSRLDQ $0x8,%XMM3,%XMM1 |
0x59732c VPADDQ %XMM1,%XMM3,%XMM4 |
0x597330 VMOVQ %XMM4,%RAX |
0x597335 ADD %R8,%RAX |
0x597338 MOV %R9,%R8 |
0x59733b AND $-0x4,%R8 |
0x59733f ADD %R8,%RDX |
0x597342 AND $0x3,%R9D |
0x597346 JE 597370 |
0x597348 LEA 0x1(%RDX),%RSI |
0x59734c LEA (,%RDX,8),%RCX |
0x597354 ADD (%R10,%RDX,8),%RAX |
0x597358 CMP %R14,%RSI |
0x59735b JGE 597370 |
0x59735d ADD $0x2,%RDX |
0x597361 ADD 0x8(%R10,%RCX,1),%RAX |
0x597366 CMP %RDX,%R14 |
0x597369 JLE 597370 |
0x59736b ADD 0x10(%R10,%RCX,1),%RAX |
0x597370 MOV 0x78(%RSP),%R14 |
0x597375 MOV 0x50(%RSP),%R10 |
0x59737a CMP %R10,%R14 |
0x59737d JGE 597869 |
0x597383 SUB %R14,%R10 |
0x597386 LEA -0x1(%R10),%RDX |
0x59738a CMP $0x6,%RDX |
0x59738e JBE 59789f |
0x597394 MOV %R10,%RCX |
0x597397 LEA (%R11,%R14,8),%RSI |
0x59739b VPBROADCASTQ %RAX,%ZMM5 |
0x5973a1 SHR $0x3,%RCX |
0x5973a5 SAL $0x6,%RCX |
0x5973a9 LEA (%RCX,%RSI,1),%RDI |
0x5973ad SUB $0x40,%RCX |
0x5973b1 SHR $0x6,%RCX |
0x5973b5 INC %RCX |
0x5973b8 AND $0x7,%ECX |
0x5973bb JE 597465 |
0x5973c1 CMP $0x1,%RCX |
0x5973c5 JE 59744f |
0x5973cb CMP $0x2,%RCX |
0x5973cf JE 59743e |
0x5973d1 CMP $0x3,%RCX |
0x5973d5 JE 59742d |
0x5973d7 CMP $0x4,%RCX |
0x5973db JE 59741c |
0x5973dd CMP $0x5,%RCX |
0x5973e1 JE 59740b |
0x5973e3 CMP $0x6,%RCX |
0x5973e7 JE 5973fa |
0x5973e9 VPADDQ (%RSI),%ZMM5,%ZMM6 |
0x5973ef ADD $0x40,%RSI |
0x5973f3 VMOVDQU64 %ZMM6,-0x40(%RSI) |
0x5973fa VPADDQ (%RSI),%ZMM5,%ZMM7 |
0x597400 ADD $0x40,%RSI |
0x597404 VMOVDQU64 %ZMM7,-0x40(%RSI) |
0x59740b VPADDQ (%RSI),%ZMM5,%ZMM8 |
0x597411 ADD $0x40,%RSI |
0x597415 VMOVDQU64 %ZMM8,-0x40(%RSI) |
0x59741c VPADDQ (%RSI),%ZMM5,%ZMM9 |
0x597422 ADD $0x40,%RSI |
0x597426 VMOVDQU64 %ZMM9,-0x40(%RSI) |
0x59742d VPADDQ (%RSI),%ZMM5,%ZMM10 |
0x597433 ADD $0x40,%RSI |
0x597437 VMOVDQU64 %ZMM10,-0x40(%RSI) |
0x59743e VPADDQ (%RSI),%ZMM5,%ZMM11 |
0x597444 ADD $0x40,%RSI |
0x597448 VMOVDQU64 %ZMM11,-0x40(%RSI) |
0x59744f VPADDQ (%RSI),%ZMM5,%ZMM12 |
0x597455 ADD $0x40,%RSI |
0x597459 VMOVDQU64 %ZMM12,-0x40(%RSI) |
0x597460 CMP %RSI,%RDI |
0x597463 JE 5974e0 |
(2995) 0x597465 VPADDQ (%RSI),%ZMM5,%ZMM13 |
(2995) 0x59746b VPADDQ 0x40(%RSI),%ZMM5,%ZMM14 |
(2995) 0x597472 ADD $0x200,%RSI |
(2995) 0x597479 VPADDQ -0x180(%RSI),%ZMM5,%ZMM15 |
(2995) 0x597480 VPADDQ -0x140(%RSI),%ZMM5,%ZMM2 |
(2995) 0x597487 VPADDQ -0x100(%RSI),%ZMM5,%ZMM0 |
(2995) 0x59748e VPADDQ -0xc0(%RSI),%ZMM5,%ZMM3 |
(2995) 0x597495 VMOVDQU64 %ZMM13,-0x200(%RSI) |
(2995) 0x59749c VPADDQ -0x80(%RSI),%ZMM5,%ZMM1 |
(2995) 0x5974a3 VPADDQ -0x40(%RSI),%ZMM5,%ZMM4 |
(2995) 0x5974aa VMOVDQU64 %ZMM14,-0x1c0(%RSI) |
(2995) 0x5974b1 VMOVDQU64 %ZMM15,-0x180(%RSI) |
(2995) 0x5974b8 VMOVDQU64 %ZMM2,-0x140(%RSI) |
(2995) 0x5974bf VMOVDQU64 %ZMM0,-0x100(%RSI) |
(2995) 0x5974c6 VMOVDQU64 %ZMM3,-0xc0(%RSI) |
(2995) 0x5974cd VMOVDQU64 %ZMM1,-0x80(%RSI) |
(2995) 0x5974d4 VMOVDQU64 %ZMM4,-0x40(%RSI) |
(2995) 0x5974db CMP %RSI,%RDI |
(2995) 0x5974de JNE 597465 |
0x5974e0 MOV 0x78(%RSP),%R14 |
0x5974e5 MOV %R10,%R8 |
0x5974e8 AND $-0x8,%R8 |
0x5974ec LEA (%R8,%R14,1),%R9 |
0x5974f0 TEST $0x7,%R10B |
0x5974f4 JE 59755c |
0x5974f6 SUB %R8,%R10 |
0x5974f9 LEA -0x1(%R10),%RDX |
0x5974fd CMP $0x2,%RDX |
0x597501 JBE 59752f |
0x597503 MOV 0x78(%RSP),%RSI |
0x597508 MOV %R10,%RCX |
0x59750b VPBROADCASTQ %RAX,%YMM5 |
0x597511 AND $-0x4,%RCX |
0x597515 ADD %RSI,%R8 |
0x597518 ADD %RCX,%R9 |
0x59751b AND $0x3,%R10D |
0x59751f LEA (%R11,%R8,8),%R8 |
0x597523 VPADDQ (%R8),%YMM5,%YMM6 |
0x597528 VMOVDQU %YMM6,(%R8) |
0x59752d JE 59755c |
0x59752f MOV 0x50(%RSP),%RDI |
0x597534 LEA (,%R9,8),%R14 |
0x59753c LEA 0x1(%R9),%R10 |
0x597540 ADD %RAX,(%R11,%R14,1) |
0x597544 CMP %R10,%RDI |
0x597547 JLE 59755c |
0x597549 ADD $0x2,%R9 |
0x59754d ADD %RAX,0x8(%R11,%R14,1) |
0x597552 CMP %R9,%RDI |
0x597555 JLE 59755c |
0x597557 ADD %RAX,0x10(%R11,%R14,1) |
0x59755c MOV %R11,0x58(%RSP) |
0x597561 VZEROUPPER |
0x597564 CALL 40f290 <GOMP_barrier@plt> |
0x597569 CMPQ $0,0x40(%RSP) |
0x59756f MOV 0x58(%RSP),%R11 |
0x597574 JLE 5978aa |
0x59757a MOV 0x40(%RSP),%RDX |
0x59757f MOV $0xff,%ESI |
0x597584 MOV %RBX,%RDI |
0x597587 MOV %R11,0x58(%RSP) |
0x59758c SAL $0x3,%RDX |
0x597590 CALL 40f0a0 <memset@plt> |
0x597595 MOV 0x58(%RSP),%R11 |
0x59759a MOV 0x78(%RSP),%R9 |
0x59759f MOV 0x50(%RSP),%RDX |
0x5975a4 MOV (%R11,%R9,8),%RDI |
0x5975a8 CMP %RDX,%R9 |
0x5975ab JGE 59769f |
0x5975b1 MOV 0x30(%RSP),%RSI |
0x5975b6 MOV %R11,0x40(%RSP) |
0x5975bb MOV %R15,0x58(%RSP) |
0x5975c0 MOV 0x48(%RSI),%R10 |
0x5975c4 MOV 0x58(%RSI),%R14 |
0x5975c8 NOPL (%RAX,%RAX,1) |
(2991) 0x5975d0 MOV 0x40(%RSP),%R15 |
(2991) 0x5975d5 MOV 0x78(%RSP),%R11 |
(2991) 0x5975da CMPQ $0,0x48(%RSP) |
(2991) 0x5975e0 MOV (%R15,%R11,8),%R9 |
(2991) 0x5975e4 JE 5975f9 |
(2991) 0x5975e6 MOV %RDI,(%RBX,%R11,8) |
(2991) 0x5975ea MOVQ $0,(%R10,%RDI,8) |
(2991) 0x5975f2 MOV %R11,(%R14,%RDI,8) |
(2991) 0x5975f6 INC %RDI |
(2991) 0x5975f9 MOV 0x68(%RSP),%R8 |
(2991) 0x5975fe MOV 0x78(%RSP),%RAX |
(2991) 0x597603 MOV (%R8,%RAX,8),%RCX |
(2991) 0x597607 MOV 0x8(%R8,%RAX,8),%RDX |
(2991) 0x59760c CMP %RDX,%RCX |
(2991) 0x59760f JGE 59768a |
(2991) 0x597611 NOPL (%RAX) |
(2992) 0x597618 MOV 0x58(%RSP),%R15 |
(2992) 0x59761d MOV 0x70(%RSP),%R8 |
(2992) 0x597622 MOV 0x60(%RSP),%RSI |
(2992) 0x597627 MOV (%R15,%RCX,8),%R11 |
(2992) 0x59762b VMOVSD (%RSI,%RCX,8),%XMM7 |
(2992) 0x597630 LEA 0x8(%R8,%R11,8),%R15 |
(2992) 0x597635 MOV (%R8,%R11,8),%RAX |
(2992) 0x597639 MOV (%R15),%R11 |
(2992) 0x59763c CMP %R11,%RAX |
(2992) 0x59763f JGE 597682 |
(2992) 0x597641 NOPL (%RAX) |
(2993) 0x597648 MOV (%R12,%RAX,8),%RSI |
(2993) 0x59764c VMULSD (%R13,%RAX,8),%XMM7,%XMM8 |
(2993) 0x597653 LEA (%RBX,%RSI,8),%RDX |
(2993) 0x597657 MOV (%RDX),%R8 |
(2993) 0x59765a CMP %R8,%R9 |
(2993) 0x59765d JG 5976d0 |
(2993) 0x59765f LEA (%R10,%R8,8),%RSI |
(2993) 0x597663 INC %RAX |
(2993) 0x597666 VADDSD (%RSI),%XMM8,%XMM9 |
(2993) 0x59766a VMOVSD %XMM9,(%RSI) |
(2993) 0x59766e CMP %RAX,%R11 |
(2993) 0x597671 JG 597648 |
(2992) 0x597673 MOV 0x68(%RSP),%RAX |
(2992) 0x597678 MOV 0x78(%RSP),%R15 |
(2992) 0x59767d MOV 0x8(%RAX,%R15,8),%RDX |
(2992) 0x597682 INC %RCX |
(2992) 0x597685 CMP %RDX,%RCX |
(2992) 0x597688 JL 597618 |
(2991) 0x59768a INCQ 0x78(%RSP) |
(2991) 0x59768f MOV 0x78(%RSP),%R9 |
(2991) 0x597694 CMP %R9,0x50(%RSP) |
(2991) 0x597699 JNE 5975d0 |
0x59769f LEA -0x28(%RBP),%RSP |
0x5976a3 MOV %RBX,%RDI |
0x5976a6 POP %RBX |
0x5976a7 POP %R12 |
0x5976a9 POP %R13 |
0x5976ab POP %R14 |
0x5976ad POP %R15 |
0x5976af POP %RBP |
0x5976b0 JMP 5b1eb0 |
0x5976b5 NOPL (%RAX) |
(2999) 0x5976b8 INC %RAX |
(2999) 0x5976bb CMP %RCX,%RAX |
(2999) 0x5976be JL 597128 |
(2998) 0x5976c4 JMP 59714a |
0x5976c9 NOPL (%RAX) |
(2993) 0x5976d0 MOV %RDI,(%RDX) |
(2993) 0x5976d3 INC %RAX |
(2993) 0x5976d6 MOV %RSI,(%R14,%RDI,8) |
(2993) 0x5976da INC %RDI |
(2993) 0x5976dd MOV (%RDX),%R11 |
(2993) 0x5976e0 VMOVSD %XMM8,(%R10,%R11,8) |
(2993) 0x5976e6 MOV (%R15),%R11 |
(2993) 0x5976e9 CMP %R11,%RAX |
(2993) 0x5976ec JL 597648 |
(2992) 0x5976f2 JMP 597673 |
0x5976f7 MOV %R14,%RSI |
0x5976fa IMUL %RAX,%RSI |
0x5976fe LEA (%RDX,%RSI,1),%RBX |
0x597702 ADD %RAX,%RSI |
0x597705 LEA (%RSI,%RDX,1),%R8 |
0x597709 MOV %RBX,0x78(%RSP) |
0x59770e MOV %R8,0x50(%RSP) |
0x597713 JMP 59704f |
0x597718 MOV 0x38(%RSP),%RSI |
0x59771d MOV 0x28(%RSP),%RDI |
0x597722 MOV %R10,%R9 |
0x597725 LEA (%R11,%RSI,8),%R8 |
0x597729 LEA (%R10,%RDI,8),%RAX |
0x59772d MOVQ $0,(%R8) |
0x597734 TEST %RDI,%RDI |
0x597737 JLE 597802 |
0x59773d MOV %RAX,%RCX |
0x597740 SUB %R10,%RCX |
0x597743 SUB $0x8,%RCX |
0x597747 SHR $0x3,%RCX |
0x59774b INC %RCX |
0x59774e AND $0x7,%ECX |
0x597751 JE 5977c2 |
0x597753 CMP $0x1,%RCX |
0x597757 JE 5977b3 |
0x597759 CMP $0x2,%RCX |
0x59775d JE 5977a9 |
0x59775f CMP $0x3,%RCX |
0x597763 JE 59779f |
0x597765 CMP $0x4,%RCX |
0x597769 JE 597795 |
0x59776b CMP $0x5,%RCX |
0x59776f JE 59778b |
0x597771 CMP $0x6,%RCX |
0x597775 JE 597781 |
0x597777 MOV (%R10),%R14 |
0x59777a ADD $0x8,%R9 |
0x59777e MOV %R14,(%R8) |
0x597781 ADD (%R9),%R14 |
0x597784 ADD $0x8,%R9 |
0x597788 MOV %R14,(%R8) |
0x59778b ADD (%R9),%R14 |
0x59778e ADD $0x8,%R9 |
0x597792 MOV %R14,(%R8) |
0x597795 ADD (%R9),%R14 |
0x597798 ADD $0x8,%R9 |
0x59779c MOV %R14,(%R8) |
0x59779f ADD (%R9),%R14 |
0x5977a2 ADD $0x8,%R9 |
0x5977a6 MOV %R14,(%R8) |
0x5977a9 ADD (%R9),%R14 |
0x5977ac ADD $0x8,%R9 |
0x5977b0 MOV %R14,(%R8) |
0x5977b3 ADD (%R9),%R14 |
0x5977b6 ADD $0x8,%R9 |
0x5977ba MOV %R14,(%R8) |
0x5977bd CMP %RAX,%R9 |
0x5977c0 JE 597802 |
(2994) 0x5977c2 ADD (%R9),%R14 |
(2994) 0x5977c5 ADD $0x40,%R9 |
(2994) 0x5977c9 MOV %R14,(%R8) |
(2994) 0x5977cc ADD -0x38(%R9),%R14 |
(2994) 0x5977d0 MOV %R14,(%R8) |
(2994) 0x5977d3 ADD -0x30(%R9),%R14 |
(2994) 0x5977d7 MOV %R14,(%R8) |
(2994) 0x5977da ADD -0x28(%R9),%R14 |
(2994) 0x5977de MOV %R14,(%R8) |
(2994) 0x5977e1 ADD -0x20(%R9),%R14 |
(2994) 0x5977e5 MOV %R14,(%R8) |
(2994) 0x5977e8 ADD -0x18(%R9),%R14 |
(2994) 0x5977ec MOV %R14,(%R8) |
(2994) 0x5977ef ADD -0x10(%R9),%R14 |
(2994) 0x5977f3 MOV %R14,(%R8) |
(2994) 0x5977f6 ADD -0x8(%R9),%R14 |
(2994) 0x5977fa MOV %R14,(%R8) |
(2994) 0x5977fd CMP %RAX,%R9 |
(2994) 0x597800 JNE 5977c2 |
0x597802 MOV 0x40(%RSP),%RSI |
0x597807 MOV 0x38(%RSP),%RDI |
0x59780c MOV %R14,%RDX |
0x59780f MOV %R11,0x58(%RSP) |
0x597814 CALL 599ff0 <hypre_CSRMatrixCreate> |
0x597819 MOV 0x30(%RSP),%R14 |
0x59781e MOV 0x58(%RSP),%RDX |
0x597823 MOV %RAX,%RDI |
0x597826 MOV %RDX,(%RAX) |
0x597829 MOV %RAX,0x40(%R14) |
0x59782d CALL 59a100 <hypre_CSRMatrixInitialize> |
0x597832 MOV 0x40(%R14),%R10 |
0x597836 MOV 0x8(%R10),%R11 |
0x59783a MOV 0x30(%R10),%RSI |
0x59783e MOV %R11,0x58(%R14) |
0x597842 MOV %RSI,0x48(%R14) |
0x597846 CALL 40f290 <GOMP_barrier@plt> |
0x59784b CMPQ $0,0x40(%RSP) |
0x597851 MOV 0x58(%RSP),%R11 |
0x597856 JG 59757a |
0x59785c JMP 59759a |
0x597861 MOV %R8,%RAX |
0x597864 JMP 597370 |
0x597869 MOV %R11,0x58(%RSP) |
0x59786e VZEROUPPER |
0x597871 CALL 40f290 <GOMP_barrier@plt> |
0x597876 CMPQ $0,0x40(%RSP) |
0x59787c MOV 0x58(%RSP),%R11 |
0x597881 JG 59757a |
0x597887 JMP 59769f |
0x59788c MOV %R8,%RAX |
0x59788f VPXOR %XMM0,%XMM0,%XMM0 |
0x597893 XOR %ECX,%ECX |
0x597895 MOV $0x1,%EDX |
0x59789a JMP 597301 |
0x59789f MOV %R14,%R9 |
0x5978a2 XOR %R8D,%R8D |
0x5978a5 JMP 5974f6 |
0x5978aa MOV 0x78(%RSP),%RAX |
0x5978af MOV (%R11,%RAX,8),%RDI |
0x5978b3 JMP 5975b1 |
0x5978b8 NOPL (%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○100.00 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | csr_matop.c:185-302 |
Module | exec |
nb instructions | 399 |
nb uops | 475 |
loop length | 1678 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 6 |
used zmm registers | 9 |
nb stack references | 13 |
micro-operation queue | 119.75 cycles |
front end | 119.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 74.00 | 73.92 | 54.83 | 54.50 | 69.00 | 74.08 | 74.00 | 54.67 |
cycles | 74.00 | 73.92 | 54.83 | 54.50 | 69.00 | 74.08 | 74.00 | 54.67 |
Cycles executing div or sqrt instructions | 24.00-90.00 |
FE+BE cycles | 104.00-151.51 |
Stall cycles | 0.00-47.45 |
Front-end | 119.75 |
Dispatch | 74.08 |
DIV/SQRT | 24.00-90.00 |
Overall L1 | 119.75 |
all | 28% |
load | 61% |
store | 15% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 28% |
load | 62% |
store | 24% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 77% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x68(%RDI),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x50(%RDI),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RDI,0x30(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x38(%RDI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x28(%RDI),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%RDI),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x60(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV (%RBX),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x30(%RDI),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R11,0x78(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x20(%RDI),%R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x10(%RDI),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RDX,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x8(%RDI),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RSI,0x70(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,0x38(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,0x68(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R9,0x60(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 5b5180 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 5b5170 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x78(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x58(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
IDIV %R10 | 57 | 14.25 | 14.25 | 0 | 0 | 0 | 14.25 | 14.25 | 0 | 42-95 | 24-90 |
CMP %R14,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 5976f7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
IMUL %RAX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R14,%RDI,1),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%R14,%RAX,1),%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R9,0x78(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R10,0x50(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x40(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R11,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 5b1df0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CMPQ $0,0x40(%RSP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV 0x58(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x20(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JLE 5970ab | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0xff,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R11,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 40f0a0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x58(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x20(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x78(%RSP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x50(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R8,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %RDX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 597172 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RCX,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R13,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x70(%RSP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R11,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x68(%RSP),%R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x20(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x58(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R8,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDI,(%RCX,%R14,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R11,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 40f290 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
TEST %R14,%R14 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x58(%RSP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x20(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JE 597718 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV (%R10),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP $0x1,%R14 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 597861 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x2(%R14),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%R14),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x6,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 59788c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R9,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R10,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPXOR %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SHR $0x3,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
SAL $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (%RDI,%R10,1),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x40,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59726c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597259 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59724b | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59723d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59722f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597221 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597213 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVDQU64 0x8(%R10),%ZMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
LEA 0x40(%R10),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RSI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5972c7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VEXTRACTI64X4 $0x1,%ZMM2,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
MOV %R9,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPADDQ %YMM2,%YMM9,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
AND $-0x8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VEXTRACTI64X2 $0x1,%YMM0,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDQ %XMM0,%XMM10,%XMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPSRLDQ $0x8,%XMM12,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPADDQ %XMM13,%XMM12,%XMM14 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM14,%RAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADD %R8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%R9D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597370 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %RCX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA -0x1(%RDI),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x2,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP $0x2,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 597348 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VPADDQ 0x8(%R10,%RCX,8),%YMM0,%YMM15 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VEXTRACTI64X2 $0x1,%YMM15,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDQ %XMM0,%XMM15,%XMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPSRLDQ $0x8,%XMM3,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPADDQ %XMM1,%XMM3,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM4,%RAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADD %R8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R9,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x4,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %R8,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x3,%R9D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597370 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA 0x1(%RDX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RDX,8),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD (%R10,%RDX,8),%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
CMP %R14,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 597370 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD $0x2,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD 0x8(%R10,%RCX,1),%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
CMP %RDX,%R14 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 597370 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD 0x10(%R10,%RCX,1),%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV 0x78(%RSP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x50(%RSP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R10,%R14 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 597869 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SUB %R14,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA -0x1(%R10),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x6,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 59789f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%R11,%R14,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RAX,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
SAL $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (%RCX,%RSI,1),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x40,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597465 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59744f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59743e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59742d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59741c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59740b | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5973fa | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VPADDQ (%RSI),%ZMM5,%ZMM6 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDQU64 %ZMM6,-0x40(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPADDQ (%RSI),%ZMM5,%ZMM7 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDQU64 %ZMM7,-0x40(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPADDQ (%RSI),%ZMM5,%ZMM8 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDQU64 %ZMM8,-0x40(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPADDQ (%RSI),%ZMM5,%ZMM9 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDQU64 %ZMM9,-0x40(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPADDQ (%RSI),%ZMM5,%ZMM10 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDQU64 %ZMM10,-0x40(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPADDQ (%RSI),%ZMM5,%ZMM11 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDQU64 %ZMM11,-0x40(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPADDQ (%RSI),%ZMM5,%ZMM12 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDQU64 %ZMM12,-0x40(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
CMP %RSI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5974e0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x78(%RSP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R10,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R8,%R14,1),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST $0x7,%R10B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59755c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SUB %R8,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA -0x1(%R10),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x2,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 59752f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x78(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %RAX,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
AND $-0x4,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %RSI,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %RCX,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x3,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R11,%R8,8),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDQ (%R8),%YMM5,%YMM6 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VMOVDQU %YMM6,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
JE 59755c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x50(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (,%R9,8),%R14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%R9),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RAX,(%R11,%R14,1) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
CMP %R10,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 59755c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD $0x2,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %RAX,0x8(%R11,%R14,1) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
CMP %R9,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 59755c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %RAX,0x10(%R11,%R14,1) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
MOV %R11,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 40f290 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CMPQ $0,0x40(%RSP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV 0x58(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JLE 5978aa | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x40(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0xff,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R11,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CALL 40f0a0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x58(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x78(%RSP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x50(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%R11,%R9,8),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %RDX,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 59769f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x30(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R11,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R15,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x48(%RSI),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x58(%RSI),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 5b1eb0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R14,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
IMUL %RAX,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%RSI,1),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RAX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%RSI,%RDX,1),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x78(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,0x50(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 59704f | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV 0x38(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x28(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R10,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%R11,%RSI,8),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R10,%RDI,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
TEST %RDI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 597802 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R10,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB $0x8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5977c2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5977b3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5977a9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59779f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597795 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59778b | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597781 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV (%R10),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R14,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD (%R9),%R14 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R14,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD (%R9),%R14 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R14,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD (%R9),%R14 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R14,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD (%R9),%R14 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R14,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD (%R9),%R14 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R14,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD (%R9),%R14 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R14,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP %RAX,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597802 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x40(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x38(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R14,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R11,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 599ff0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x30(%RSP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x58(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDX,(%RAX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RAX,0x40(%R14) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 59a100 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x40(%R14),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%R10),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x30(%R10),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R11,0x58(%R14) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSI,0x48(%R14) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 40f290 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CMPQ $0,0x40(%RSP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV 0x58(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JG 59757a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 59759a | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %R8,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 597370 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %R11,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 40f290 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CMPQ $0,0x40(%RSP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV 0x58(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JG 59757a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 59769f | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %R8,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0x1,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JMP 597301 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %R14,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 5974f6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV 0x78(%RSP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%R11,%RAX,8),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 5975b1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | csr_matop.c:185-302 |
Module | exec |
nb instructions | 399 |
nb uops | 475 |
loop length | 1678 |
used x86 registers | 16 |
used mmx registers | 0 |
used xmm registers | 10 |
used ymm registers | 6 |
used zmm registers | 9 |
nb stack references | 13 |
micro-operation queue | 119.75 cycles |
front end | 119.75 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 74.00 | 73.92 | 54.83 | 54.50 | 69.00 | 74.08 | 74.00 | 54.67 |
cycles | 74.00 | 73.92 | 54.83 | 54.50 | 69.00 | 74.08 | 74.00 | 54.67 |
Cycles executing div or sqrt instructions | 24.00-90.00 |
FE+BE cycles | 104.00-151.51 |
Stall cycles | 0.00-47.45 |
Front-end | 119.75 |
Dispatch | 74.08 |
DIV/SQRT | 24.00-90.00 |
Overall L1 | 119.75 |
all | 28% |
load | 61% |
store | 15% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 28% |
load | 62% |
store | 24% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 77% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 12% |
other | 15% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
PUSH %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
PUSH %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x40,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD $-0x80,%RSP | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x68(%RDI),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x50(%RDI),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RDI,0x30(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x38(%RDI),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x28(%RDI),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x18(%RDI),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x60(%RDI),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV (%RBX),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x30(%RDI),%R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R11,0x78(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x20(%RDI),%R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x10(%RDI),%R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RDX,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x8(%RDI),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RSI,0x70(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,0x38(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RDI,0x68(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R9,0x60(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RAX,0x48(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 5b5180 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CALL 5b5170 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x78(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x58(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RAX,0x28(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x38(%RSP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
IDIV %R10 | 57 | 14.25 | 14.25 | 0 | 0 | 0 | 14.25 | 14.25 | 0 | 42-95 | 24-90 |
CMP %R14,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 5976f7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
IMUL %RAX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R14,%RDI,1),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%R14,%RAX,1),%R10 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R9,0x78(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R10,0x50(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x40(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0x8,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R11,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 5b1df0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CMPQ $0,0x40(%RSP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV 0x58(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x20(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JLE 5970ab | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x40(%RSP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0xff,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R11,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
LEA (,%RAX,8),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 40f0a0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x58(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x20(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x78(%RSP),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x50(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R8,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
CMP %RDX,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 597172 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RCX,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R13,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x70(%RSP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R11,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x68(%RSP),%R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV 0x20(%RSP),%RCX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x58(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R8,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDI,(%RCX,%R14,8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R11,0x20(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RCX,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 40f290 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
TEST %R14,%R14 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV 0x58(%RSP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x20(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JE 597718 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV (%R10),%R8 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP $0x1,%R14 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 597861 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA -0x2(%R14),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA -0x1(%R14),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x6,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 59788c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R9,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R10,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPXOR %XMM2,%XMM2,%XMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SHR $0x3,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
SAL $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (%RDI,%R10,1),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x40,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x6,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%EDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59726c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597259 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59724b | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59723d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59722f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597221 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597213 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VMOVDQU64 0x8(%R10),%ZMM2 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 5 | 0.50 |
LEA 0x40(%R10),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VPADDQ 0x8(%RAX),%ZMM2,%ZMM2 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RSI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5972c7 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VEXTRACTI64X4 $0x1,%ZMM2,%YMM9 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
MOV %R9,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPADDQ %YMM2,%YMM9,%YMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
AND $-0x8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VEXTRACTI64X2 $0x1,%YMM0,%XMM10 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
LEA 0x1(%RCX),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDQ %XMM0,%XMM10,%XMM12 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPSRLDQ $0x8,%XMM12,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPADDQ %XMM13,%XMM12,%XMM14 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM14,%RAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADD %R8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%R9D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597370 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R14,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %RCX,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA -0x1(%RDI),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x2,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP $0x2,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 597348 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VPADDQ 0x8(%R10,%RCX,8),%YMM0,%YMM15 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VEXTRACTI64X2 $0x1,%YMM15,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 1 |
VPADDQ %XMM0,%XMM15,%XMM3 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VPSRLDQ $0x8,%XMM3,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
VPADDQ %XMM1,%XMM3,%XMM4 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM4,%RAX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
ADD %R8,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R9,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x4,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %R8,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x3,%R9D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597370 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
LEA 0x1(%RDX),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (,%RDX,8),%RCX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD (%R10,%RDX,8),%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
CMP %R14,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 597370 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD $0x2,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD 0x8(%R10,%RCX,1),%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
CMP %RDX,%R14 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 597370 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD 0x10(%R10,%RCX,1),%RAX | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV 0x78(%RSP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x50(%RSP),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %R10,%R14 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 597869 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SUB %R14,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA -0x1(%R10),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x6,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 59789f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%R11,%R14,8),%RSI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPBROADCASTQ %RAX,%ZMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
SAL $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA (%RCX,%RSI,1),%RDI | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
SUB $0x40,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x6,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597465 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59744f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59743e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59742d | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59741c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59740b | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5973fa | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
VPADDQ (%RSI),%ZMM5,%ZMM6 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDQU64 %ZMM6,-0x40(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPADDQ (%RSI),%ZMM5,%ZMM7 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDQU64 %ZMM7,-0x40(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPADDQ (%RSI),%ZMM5,%ZMM8 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDQU64 %ZMM8,-0x40(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPADDQ (%RSI),%ZMM5,%ZMM9 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDQU64 %ZMM9,-0x40(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPADDQ (%RSI),%ZMM5,%ZMM10 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDQU64 %ZMM10,-0x40(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPADDQ (%RSI),%ZMM5,%ZMM11 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDQU64 %ZMM11,-0x40(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
VPADDQ (%RSI),%ZMM5,%ZMM12 | 1 | 0.50 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD $0x40,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
VMOVDQU64 %ZMM12,-0x40(%RSI) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
CMP %RSI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5974e0 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x78(%RSP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R10,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x8,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R8,%R14,1),%R9 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
TEST $0x7,%R10B | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59755c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
SUB %R8,%R10 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA -0x1(%R10),%RDX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP $0x2,%RDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JBE 59752f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x78(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R10,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPBROADCASTQ %RAX,%YMM5 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 |
AND $-0x4,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %RSI,%R8 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %RCX,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x3,%R10D | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%R11,%R8,8),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
VPADDQ (%R8),%YMM5,%YMM6 | 1 | 0.33 | 0.33 | 0.50 | 0.50 | 0 | 0.33 | 0 | 0 | 1 | 0.50 |
VMOVDQU %YMM6,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 4 | 1 |
JE 59755c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x50(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
LEA (,%R9,8),%R14 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA 0x1(%R9),%R10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RAX,(%R11,%R14,1) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
CMP %R10,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 59755c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD $0x2,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
ADD %RAX,0x8(%R11,%R14,1) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
CMP %R9,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 59755c | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
ADD %RAX,0x10(%R11,%R14,1) | 2 | 0.25 | 0.25 | 0.83 | 0.83 | 1 | 0.25 | 0.25 | 0.33 | 5 | 1 |
MOV %R11,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 40f290 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CMPQ $0,0x40(%RSP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV 0x58(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JLE 5978aa | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x40(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV $0xff,%ESI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R11,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
CALL 40f0a0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x58(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x78(%RSP),%R9 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x50(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%R11,%R9,8),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
CMP %RDX,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JGE 59769f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x30(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R11,0x40(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R15,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV 0x48(%RSI),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x58(%RSI),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA -0x28(%RBP),%RSP | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RBX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R12 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R13 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %R15 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
JMP 5b1eb0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R14,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
IMUL %RAX,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%RSI,1),%RBX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
ADD %RAX,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
LEA (%RSI,%RDX,1),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RBX,0x78(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %R8,0x50(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
JMP 59704f | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV 0x38(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x28(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R10,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
LEA (%R11,%RSI,8),%R8 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
LEA (%R10,%RDI,8),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVQ $0,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 2 | 1 |
TEST %RDI,%RDI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 597802 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SUB %R10,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SUB $0x8,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
INC %RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
AND $0x7,%ECX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5977c2 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x1,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5977b3 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x2,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 5977a9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x3,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59779f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597795 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x5,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 59778b | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x6,%RCX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597781 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV (%R10),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R14,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD (%R9),%R14 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R14,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD (%R9),%R14 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R14,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD (%R9),%R14 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R14,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD (%R9),%R14 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R14,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD (%R9),%R14 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R14,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
ADD (%R9),%R14 | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
ADD $0x8,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
MOV %R14,(%R8) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CMP %RAX,%R9 | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JE 597802 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
MOV 0x40(%RSP),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x38(%RSP),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R14,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %R11,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 599ff0 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x30(%RSP),%R14 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x58(%RSP),%RDX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %RAX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RDX,(%RAX) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RAX,0x40(%R14) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 59a100 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
MOV 0x40(%R14),%R10 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x8(%R10),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV 0x30(%R10),%RSI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV %R11,0x58(%R14) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSI,0x48(%R14) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
CALL 40f290 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CMPQ $0,0x40(%RSP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV 0x58(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JG 59757a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 59759a | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %R8,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 597370 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %R11,0x58(%RSP) | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
VZEROUPPER | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 40f290 | 2 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 1 | 0.33 | 0 | 1 |
CMPQ $0,0x40(%RSP) | 1 | 0.25 | 0.25 | 0.50 | 0.50 | 0 | 0.25 | 0.25 | 0 | 1 | 0.50 |
MOV 0x58(%RSP),%R11 | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JG 59757a | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
JMP 59769f | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %R8,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV $0x1,%EDX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JMP 597301 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV %R14,%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 5974f6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
MOV 0x78(%RSP),%RAX | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
MOV (%R11,%RAX,8),%RDI | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 4-5 | 0.50 |
JMP 5975b1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_CSRMatrixMultiply._omp_fn.0– | 1.36 | 0.5 |
▼Loop 2991 - csr_matop.c:272-298 - exec– | 0.01 | 0.01 |
▼Loop 2992 - csr_matop.c:282-298 - exec– | 0.09 | 0.04 |
○Loop 2993 - csr_matop.c:286-298 - exec | 0.68 | 0.25 |
○Loop 2996 - csr_matop.c:245-246 - exec | 0 | 0 |
○Loop 2994 - csr_matop.c:254-255 - exec | 0 | 0 |
▼Loop 2997 - csr_matop.c:214-231 - exec– | 0 | 0 |
▼Loop 2998 - csr_matop.c:222-231 - exec– | 0.19 | 0.07 |
○Loop 2999 - csr_matop.c:225-231 - exec | 0.38 | 0.14 |
○Loop 2995 - csr_matop.c:248-249 - exec | 0 | 0 |