Function: hypre_IJMatrixSetValuesOMPParCSR.extracted.28 | Module: exec | Source: IJMatrix_parcsr.c:3240-3484 [...] | Coverage: 0.38% |
---|
Function: hypre_IJMatrixSetValuesOMPParCSR.extracted.28 | Module: exec | Source: IJMatrix_parcsr.c:3240-3484 [...] | Coverage: 0.38% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-3872/intel/AMG/build/AMG/AMG/IJ_mv/IJMatrix_parcsr.c: 3240 - 3484 |
-------------------------------------------------------------------------------- |
3240: #pragma omp parallel |
[...] |
3256: num_threads = hypre_NumActiveThreads(); |
3257: my_thread_num = hypre_GetThreadNum(); |
3258: |
3259: len = nrows/num_threads; |
3260: rest = nrows - len*num_threads; |
3261: |
3262: if (my_thread_num < rest) |
3263: { |
3264: ns = my_thread_num*(len+1); |
3265: ne = (my_thread_num+1)*(len+1); |
3266: } |
3267: else |
3268: { |
3269: ns = my_thread_num*len+rest; |
3270: ne = (my_thread_num+1)*len+rest; |
3271: } |
3272: |
3273: value_start[my_thread_num] = 0; |
3274: for (ii=ns; ii < ne; ii++) |
3275: value_start[my_thread_num] += ncols[ii]; |
3276: |
3277: #ifdef HYPRE_USING_OPENMP |
3278: #pragma omp barrier |
3279: #endif |
3280: if (my_thread_num == 0) |
3281: { |
3282: for (i=0; i < max_num_threads; i++) |
3283: value_start[i+1] += value_start[i]; |
[...] |
3289: if (my_thread_num) |
3290: indx = value_start[my_thread_num-1]; |
3291: for (ii=ns; ii < ne; ii++) |
3292: { |
3293: row = rows[ii]; |
3294: n = ncols[ii]; |
3295: /* processor owns the row */ |
3296: if (row >= row_partitioning[pstart] && row < row_partitioning[pstart+1]) |
3297: { |
3298: row_local = row - row_partitioning[pstart]; |
3299: /* compute local row number */ |
3300: if (need_aux) |
3301: { |
3302: local_j = aux_j[row_local]; |
3303: local_data = aux_data[row_local]; |
3304: space = row_space[row_local]; |
3305: old_size = row_length[row_local]; |
3306: size = space - old_size; |
3307: if (size < n) |
3308: { |
3309: size = n - size; |
3310: tmp_j = hypre_CTAlloc(HYPRE_Int,size); |
3311: tmp_data = hypre_CTAlloc(HYPRE_Complex,size); |
3312: } |
3313: tmp_indx = 0; |
3314: not_found = 1; |
3315: size = old_size; |
3316: for (i=0; i < n; i++) |
3317: { |
3318: for (j=0; j < old_size; j++) |
3319: { |
3320: if (local_j[j] == cols[indx]) |
3321: { |
3322: local_data[j] = values[indx]; |
[...] |
3329: if (size < space) |
3330: { |
3331: local_j[size] = cols[indx]; |
3332: local_data[size++] = values[indx]; |
3333: } |
3334: else |
3335: { |
3336: tmp_j[tmp_indx] = cols[indx]; |
3337: tmp_data[tmp_indx++] = values[indx]; |
[...] |
3344: row_length[row_local] = size+tmp_indx; |
3345: |
3346: if (tmp_indx) |
3347: { |
3348: aux_j[row_local] = hypre_TReAlloc(aux_j[row_local],HYPRE_Int, |
3349: size+tmp_indx); |
3350: aux_data[row_local] = hypre_TReAlloc(aux_data[row_local], |
3351: HYPRE_Complex,size+tmp_indx); |
3352: row_space[row_local] = size+tmp_indx; |
3353: local_j = aux_j[row_local]; |
[...] |
3359: for (i=0; i < tmp_indx; i++) |
3360: { |
3361: local_j[cnt] = tmp_j[i]; |
3362: local_data[cnt++] = tmp_data[i]; |
3363: } |
3364: |
3365: if (tmp_j) |
3366: { |
3367: hypre_TFree(tmp_j); |
3368: hypre_TFree(tmp_data); |
[...] |
3376: offd_indx = hypre_AuxParCSRMatrixIndxOffd(aux_matrix)[row_local]; |
3377: diag_indx = hypre_AuxParCSRMatrixIndxDiag(aux_matrix)[row_local]; |
3378: cnt_diag = diag_indx; |
3379: cnt_offd = offd_indx; |
3380: diag_space = diag_i[row_local+1]; |
3381: offd_space = offd_i[row_local+1]; |
3382: not_found = 1; |
3383: for (i=0; i < n; i++) |
3384: { |
3385: if (cols[indx] < col_0 || cols[indx] > col_n) |
3386: /* insert into offd */ |
3387: { |
3388: for (j=offd_i[row_local]; j < offd_indx; j++) |
3389: { |
3390: if (offd_j[j] == cols[indx]) |
3391: { |
3392: offd_data[j] = values[indx]; |
[...] |
3399: if (cnt_offd < offd_space) |
3400: { |
3401: offd_j[cnt_offd] = cols[indx]; |
3402: offd_data[cnt_offd++] = values[indx]; |
3403: } |
3404: else |
3405: { |
3406: hypre_error(HYPRE_ERROR_GENERIC); |
3407: #ifdef HYPRE_USING_OPENMP |
3408: #pragma omp atomic |
3409: #endif |
3410: error_flag++; |
[...] |
3422: for (j=diag_i[row_local]; j < diag_indx; j++) |
3423: { |
3424: if (diag_j[j] == cols[indx]) |
3425: { |
3426: diag_data[j] = values[indx]; |
[...] |
3433: if (cnt_diag < diag_space) |
3434: { |
3435: diag_j[cnt_diag] = cols[indx]; |
3436: diag_data[cnt_diag++] = values[indx]; |
3437: } |
3438: else |
3439: { |
3440: hypre_error(HYPRE_ERROR_GENERIC); |
3441: #ifdef HYPRE_USING_OPENMP |
3442: #pragma omp atomic |
3443: #endif |
3444: error_flag++; |
3445: if (print_level) |
[...] |
3454: indx++; |
3455: } |
3456: |
3457: hypre_AuxParCSRMatrixIndxDiag(aux_matrix)[row_local] = cnt_diag; |
3458: hypre_AuxParCSRMatrixIndxOffd(aux_matrix)[row_local] = cnt_offd; |
[...] |
3466: indx += n; |
3467: if (aux_matrix) |
3468: { |
3469: col_indx = 0; |
3470: for (i=0; i < off_proc_i_indx; i=i+2) |
3471: { |
3472: row_len = off_proc_i[i+1]; |
3473: if (off_proc_i[i] == row) |
3474: { |
3475: for (j=0; j < n; j++) |
3476: { |
3477: cnt1 = col_indx; |
3478: for (k=0; k < row_len; k++) |
3479: { |
3480: if (off_proc_j[cnt1] == cols[j]) |
3481: { |
3482: off_proc_j[cnt1++] = -1; |
3483: /*cancel_indx++;*/ |
3484: offproc_cnt[my_thread_num]++; |
0x4d3270 PUSH %RBP |
0x4d3271 MOV %RSP,%RBP |
0x4d3274 PUSH %R15 |
0x4d3276 PUSH %R14 |
0x4d3278 PUSH %R13 |
0x4d327a PUSH %R12 |
0x4d327c PUSH %RBX |
0x4d327d SUB $0x98,%RSP |
0x4d3284 MOV %R9,%RBX |
0x4d3287 MOV %R8,-0x50(%RBP) |
0x4d328b MOV %RCX,-0x48(%RBP) |
0x4d328f MOV %RDX,%R15 |
0x4d3292 MOV %RDI,-0xa0(%RBP) |
0x4d3299 CALL 4e86b0 <hypre_NumActiveThreads> |
0x4d329e MOV %RAX,%R13 |
0x4d32a1 CALL 4e86c0 <hypre_GetThreadNum> |
0x4d32a6 MOV %RAX,%R14 |
0x4d32a9 MOV %R13,%RAX |
0x4d32ac OR %R15,%RAX |
0x4d32af SHR $0x20,%RAX |
0x4d32b3 JE 4d32f1 |
0x4d32b5 MOV %R15,%RAX |
0x4d32b8 CQTO |
0x4d32ba IDIV %R13 |
0x4d32bd MOV 0xb8(%RBP),%R15 |
0x4d32c4 LEA 0x1(%R14),%RCX |
0x4d32c8 CMP %RDX,%R14 |
0x4d32cb JGE 4d3309 |
0x4d32cd LEA 0x1(%RAX),%R13 |
0x4d32d1 MOV %R13,%RSI |
0x4d32d4 IMUL %R14,%RSI |
0x4d32d8 IMUL %RCX,%R13 |
0x4d32dc MOVQ $0,(%R15,%R14,8) |
0x4d32e4 MOV %RSI,%R12 |
0x4d32e7 CMP %R13,%RSI |
0x4d32ea JL 4d3331 |
0x4d32ec JMP 4d33c0 |
0x4d32f1 MOV %R15D,%EAX |
0x4d32f4 XOR %EDX,%EDX |
0x4d32f6 DIV %R13D |
0x4d32f9 MOV 0xb8(%RBP),%R15 |
0x4d3300 LEA 0x1(%R14),%RCX |
0x4d3304 CMP %RDX,%R14 |
0x4d3307 JL 4d32cd |
0x4d3309 MOV %RAX,%RSI |
0x4d330c IMUL %R14,%RSI |
0x4d3310 ADD %RDX,%RSI |
0x4d3313 IMUL %RAX,%RCX |
0x4d3317 ADD %RDX,%RCX |
0x4d331a MOV %RCX,%R13 |
0x4d331d MOVQ $0,(%R15,%R14,8) |
0x4d3325 MOV %RSI,%R12 |
0x4d3328 CMP %R13,%RSI |
0x4d332b JGE 4d33c0 |
0x4d3331 CMP %RDX,%R14 |
0x4d3334 CMOVL %R14,%RDX |
0x4d3338 IMUL %R14,%RAX |
0x4d333c MOV %R13,%RDI |
0x4d333f SUB %RDX,%RDI |
0x4d3342 SUB %RAX,%RDI |
0x4d3345 MOV %RDI,%RCX |
0x4d3348 AND $-0x4,%RCX |
0x4d334c MOV -0x48(%RBP),%R8 |
0x4d3350 JE 4d33a2 |
0x4d3352 LEA -0x1(%RCX),%RSI |
0x4d3356 LEA (%R8,%R12,8),%R8 |
0x4d335a VPXOR %XMM0,%XMM0,%XMM0 |
0x4d335e XOR %R9D,%R9D |
0x4d3361 NOPW %CS:(%RAX,%RAX,1) |
(4058) 0x4d3370 VPADDQ (%R8,%R9,8),%YMM0,%YMM0 |
(4058) 0x4d3376 ADD $0x4,%R9 |
(4058) 0x4d337a CMP %RSI,%R9 |
(4058) 0x4d337d JBE 4d3370 |
0x4d337f VEXTRACTI128 $0x1,%YMM0,%XMM1 |
0x4d3385 VPADDQ %XMM1,%XMM0,%XMM0 |
0x4d3389 VPSHUFD $-0x12,%XMM0,%XMM1 |
0x4d338e VPADDQ %XMM1,%XMM0,%XMM0 |
0x4d3392 VMOVQ %XMM0,%RSI |
0x4d3397 CMP %RCX,%RDI |
0x4d339a MOV -0x48(%RBP),%R8 |
0x4d339e JNE 4d33a6 |
0x4d33a0 JMP 4d33bc |
0x4d33a2 XOR %ECX,%ECX |
0x4d33a4 XOR %ESI,%ESI |
0x4d33a6 ADD %RDX,%RCX |
0x4d33a9 ADD %RAX,%RCX |
0x4d33ac NOPL (%RAX) |
(4034) 0x4d33b0 ADD (%R8,%RCX,8),%RSI |
(4034) 0x4d33b4 INC %RCX |
(4034) 0x4d33b7 CMP %RCX,%R13 |
(4034) 0x4d33ba JNE 4d33b0 |
0x4d33bc MOV %RSI,(%R15,%R14,8) |
0x4d33c0 MOV -0xa0(%RBP),%RAX |
0x4d33c7 MOV (%RAX),%ESI |
0x4d33c9 MOV $0x736530,%EDI |
0x4d33ce VZEROUPPER |
0x4d33d1 CALL 410030 <__kmpc_barrier@plt> |
0x4d33d6 TEST %R14,%R14 |
0x4d33d9 JNE 4d3484 |
0x4d33df MOV 0xc8(%RBP),%RAX |
0x4d33e6 TEST %RAX,%RAX |
0x4d33e9 JLE 4d3484 |
0x4d33ef CMP $0x8,%RAX |
0x4d33f3 JB 4d3457 |
0x4d33f5 MOV %RAX,%RCX |
0x4d33f8 SHR $0x3,%RCX |
0x4d33fc MOV (%R15),%RDX |
0x4d33ff LEA 0x40(%R15),%RSI |
0x4d3403 NOPW %CS:(%RAX,%RAX,1) |
(4057) 0x4d3410 ADD -0x38(%RSI),%RDX |
(4057) 0x4d3414 MOV %RDX,-0x38(%RSI) |
(4057) 0x4d3418 ADD -0x30(%RSI),%RDX |
(4057) 0x4d341c MOV %RDX,-0x30(%RSI) |
(4057) 0x4d3420 ADD -0x28(%RSI),%RDX |
(4057) 0x4d3424 MOV %RDX,-0x28(%RSI) |
(4057) 0x4d3428 ADD -0x20(%RSI),%RDX |
(4057) 0x4d342c MOV %RDX,-0x20(%RSI) |
(4057) 0x4d3430 ADD -0x18(%RSI),%RDX |
(4057) 0x4d3434 MOV %RDX,-0x18(%RSI) |
(4057) 0x4d3438 ADD -0x10(%RSI),%RDX |
(4057) 0x4d343c MOV %RDX,-0x10(%RSI) |
(4057) 0x4d3440 ADD -0x8(%RSI),%RDX |
(4057) 0x4d3444 MOV %RDX,-0x8(%RSI) |
(4057) 0x4d3448 ADD (%RSI),%RDX |
(4057) 0x4d344b MOV %RDX,(%RSI) |
(4057) 0x4d344e ADD $0x40,%RSI |
(4057) 0x4d3452 DEC %RCX |
(4057) 0x4d3455 JNE 4d3410 |
0x4d3457 MOV %RAX,%RCX |
0x4d345a AND $-0x8,%RCX |
0x4d345e CMP %RAX,%RCX |
0x4d3461 JE 4d3484 |
0x4d3463 MOV (%R15,%RCX,8),%RDX |
0x4d3467 NOPW (%RAX,%RAX,1) |
(4056) 0x4d3470 LEA (%R15,%RCX,8),%RSI |
(4056) 0x4d3474 INC %RCX |
(4056) 0x4d3477 ADD 0x8(%RSI),%RDX |
(4056) 0x4d347b MOV %RDX,0x8(%RSI) |
(4056) 0x4d347f CMP %RCX,%RAX |
(4056) 0x4d3482 JNE 4d3470 |
0x4d3484 MOV -0xa0(%RBP),%RAX |
0x4d348b MOV (%RAX),%ESI |
0x4d348d MOV $0x736550,%EDI |
0x4d3492 CALL 410030 <__kmpc_barrier@plt> |
0x4d3497 TEST %R14,%R14 |
0x4d349a JE 4d34a3 |
0x4d349c MOV -0x8(%R15,%R14,8),%RCX |
0x4d34a1 JMP 4d34a5 |
0x4d34a3 XOR %ECX,%ECX |
0x4d34a5 MOV -0x50(%RBP),%RDI |
0x4d34a9 MOV -0x48(%RBP),%RSI |
0x4d34ad MOV %R12,%RDX |
0x4d34b0 CMP %R13,%R12 |
0x4d34b3 JGE 4d401e |
0x4d34b9 MOV 0xb0(%RBP),%RAX |
0x4d34c0 MOV 0x98(%RBP),%R9 |
0x4d34c7 MOV 0x18(%RBP),%R8 |
0x4d34cb TEST %R8,%R8 |
0x4d34ce MOV %RCX,%R8 |
0x4d34d1 SETE %CL |
0x4d34d4 LEA (%RAX,%R14,8),%R15 |
0x4d34d8 TEST %R9,%R9 |
0x4d34db SETLE %AL |
0x4d34de OR %CL,%AL |
0x4d34e0 MOV %AL,-0x31(%RBP) |
0x4d34e3 MOV %R8,%RCX |
0x4d34e6 DEC %R9 |
0x4d34e9 SHR $0x1,%R9 |
0x4d34ec MOV %R9,-0xa0(%RBP) |
0x4d34f3 VPCMPEQD %YMM3,%YMM3,%YMM3 |
0x4d34f7 MOV 0x20(%RBP),%RAX |
0x4d34fb XOR %R8D,%R8D |
0x4d34fe MOV %R8,-0x88(%RBP) |
0x4d3505 MOV %R13,-0x80(%RBP) |
0x4d3509 JMP 4d3520 |
0x4d350b NOPL (%RAX,%RAX,1) |
(4035) 0x4d3510 MOV -0x80(%RBP),%R13 |
(4035) 0x4d3514 INC %RDX |
(4035) 0x4d3517 CMP %R13,%RDX |
(4035) 0x4d351a JGE 4d401e |
(4035) 0x4d3520 MOV (%RDI,%RDX,8),%R13 |
(4035) 0x4d3524 MOV (%RSI,%RDX,8),%R14 |
(4035) 0x4d3528 MOV %R13,%R10 |
(4035) 0x4d352b SUB (%RAX),%R10 |
(4035) 0x4d352e JL 4d35e0 |
(4035) 0x4d3534 CMP 0x8(%RAX),%R13 |
(4035) 0x4d3538 JGE 4d35e0 |
(4035) 0x4d353e CMPQ $0,0x58(%RBP) |
(4035) 0x4d3543 MOV %R10,-0x40(%RBP) |
(4035) 0x4d3547 JE 4d378d |
(4035) 0x4d354d MOV %RCX,-0x30(%RBP) |
(4035) 0x4d3551 MOV 0x38(%RBP),%RAX |
(4035) 0x4d3555 MOV (%RAX,%R10,8),%R12 |
(4035) 0x4d3559 MOV 0x40(%RBP),%RAX |
(4035) 0x4d355d MOV (%RAX,%R10,8),%RAX |
(4035) 0x4d3561 MOV %RAX,-0x68(%RBP) |
(4035) 0x4d3565 MOV 0x50(%RBP),%RAX |
(4035) 0x4d3569 MOV (%RAX,%R10,8),%RCX |
(4035) 0x4d356d MOV 0x48(%RBP),%RAX |
(4035) 0x4d3571 MOV (%RAX,%R10,8),%R13 |
(4035) 0x4d3575 MOV %RCX,-0x78(%RBP) |
(4035) 0x4d3579 MOV %RCX,%RAX |
(4035) 0x4d357c SUB %R13,%RAX |
(4035) 0x4d357f MOV %R14,%RDI |
(4035) 0x4d3582 SUB %RAX,%RDI |
(4035) 0x4d3585 MOV %RDX,-0x58(%RBP) |
(4035) 0x4d3589 JLE 4d37fb |
(4035) 0x4d358f MOV $0x8,%ESI |
(4035) 0x4d3594 MOV %RDI,-0x70(%RBP) |
(4035) 0x4d3598 VZEROUPPER |
(4035) 0x4d359b CALL 4e6980 <hypre_CAlloc> |
(4035) 0x4d35a0 MOV %RAX,-0x60(%RBP) |
(4035) 0x4d35a4 MOV $0x8,%ESI |
(4035) 0x4d35a9 MOV -0x70(%RBP),%RDI |
(4035) 0x4d35ad CALL 4e6980 <hypre_CAlloc> |
(4035) 0x4d35b2 MOV -0x40(%RBP),%R10 |
(4035) 0x4d35b6 MOV %RAX,-0x88(%RBP) |
(4035) 0x4d35bd TEST %R14,%R14 |
(4035) 0x4d35c0 JG 4d380a |
(4035) 0x4d35c6 MOV 0x48(%RBP),%RAX |
(4035) 0x4d35ca MOV %R13,(%RAX,%R10,8) |
(4035) 0x4d35ce MOV -0x80(%RBP),%R13 |
(4035) 0x4d35d2 JMP 4d3afd |
0x4d35d7 NOPW (%RAX,%RAX,1) |
(4035) 0x4d35e0 ADD %R14,%RCX |
(4035) 0x4d35e3 CMPB $0,-0x31(%RBP) |
(4035) 0x4d35e7 JNE 4d3510 |
(4035) 0x4d35ed TEST %R14,%R14 |
(4035) 0x4d35f0 JLE 4d3510 |
(4035) 0x4d35f6 MOV %RCX,-0x30(%RBP) |
(4035) 0x4d35fa MOV %RDX,-0x58(%RBP) |
(4035) 0x4d35fe DEC %R14 |
(4035) 0x4d3601 XOR %ECX,%ECX |
(4035) 0x4d3603 XOR %EAX,%EAX |
(4035) 0x4d3605 JMP 4d3624 |
0x4d3607 NOPW (%RAX,%RAX,1) |
(4036) 0x4d3610 LEA 0x1(%RAX),%RDX |
(4036) 0x4d3614 CMP -0xa0(%RBP),%RAX |
(4036) 0x4d361b MOV %RDX,%RAX |
(4036) 0x4d361e JE 4d3770 |
(4036) 0x4d3624 MOV %RCX,%RSI |
(4036) 0x4d3627 MOV %RAX,%RDI |
(4036) 0x4d362a SAL $0x4,%RDI |
(4036) 0x4d362e MOV 0xa0(%RBP),%R8 |
(4036) 0x4d3635 MOV 0x8(%R8,%RDI,1),%RDX |
(4036) 0x4d363a ADD %RDX,%RCX |
(4036) 0x4d363d CMP %R13,(%R8,%RDI,1) |
(4036) 0x4d3641 JNE 4d3610 |
(4036) 0x4d3643 TEST %RDX,%RDX |
(4036) 0x4d3646 JLE 4d3610 |
(4036) 0x4d3648 MOV 0xa8(%RBP),%R8 |
(4036) 0x4d364f LEA -0x8(%R8,%RCX,8),%RDI |
(4036) 0x4d3654 LEA (%R8,%RSI,8),%RSI |
(4036) 0x4d3658 CMP %R15,%RDI |
(4036) 0x4d365b JB 4d36b0 |
(4036) 0x4d365d CMP %RSI,%R15 |
(4036) 0x4d3660 JB 4d36b0 |
(4036) 0x4d3662 XOR %EDI,%EDI |
(4036) 0x4d3664 JMP 4d367c |
0x4d3666 NOPW %CS:(%RAX,%RAX,1) |
(4040) 0x4d3670 LEA 0x1(%RDI),%R8 |
(4040) 0x4d3674 CMP %R14,%RDI |
(4040) 0x4d3677 MOV %R8,%RDI |
(4040) 0x4d367a JE 4d3610 |
(4040) 0x4d367c MOV (%RBX,%RDI,8),%R8 |
(4040) 0x4d3680 XOR %R9D,%R9D |
(4040) 0x4d3683 JMP 4d3698 |
0x4d3685 NOPW %CS:(%RAX,%RAX,1) |
(4041) 0x4d3690 INC %R9 |
(4041) 0x4d3693 CMP %R9,%RDX |
(4041) 0x4d3696 JE 4d3670 |
(4041) 0x4d3698 CMP %R8,(%RSI,%R9,8) |
(4041) 0x4d369c JNE 4d3690 |
(4041) 0x4d369e MOVQ $-0x1,(%RSI,%R9,8) |
(4041) 0x4d36a6 INCQ (%R15) |
(4041) 0x4d36a9 JMP 4d3690 |
0x4d36ab NOPL (%RAX,%RAX,1) |
(4036) 0x4d36b0 MOV %RDX,%RDI |
(4036) 0x4d36b3 AND $-0x4,%RDI |
(4036) 0x4d36b7 LEA -0x1(%RDI),%R8 |
(4036) 0x4d36bb XOR %R9D,%R9D |
(4036) 0x4d36be JMP 4d36d0 |
(4037) 0x4d36c0 LEA 0x1(%R9),%R10 |
(4037) 0x4d36c4 CMP %R14,%R9 |
(4037) 0x4d36c7 MOV %R10,%R9 |
(4037) 0x4d36ca JE 4d3610 |
(4037) 0x4d36d0 MOV (%RBX,%R9,8),%R10 |
(4037) 0x4d36d4 TEST %RDI,%RDI |
(4037) 0x4d36d7 JE 4d3740 |
(4037) 0x4d36d9 VPBROADCASTQ %R10,%YMM1 |
(4037) 0x4d36df VPXOR %XMM0,%XMM0,%XMM0 |
(4037) 0x4d36e3 XOR %R11D,%R11D |
(4037) 0x4d36e6 NOPW %CS:(%RAX,%RAX,1) |
(4039) 0x4d36f0 VMOVDQU (%RSI,%R11,8),%YMM2 |
(4039) 0x4d36f6 VPCMPEQQ %YMM1,%YMM2,%K1 |
(4039) 0x4d36fc VMOVDQU64 %YMM3,(%RSI,%R11,8){%K1} |
(4039) 0x4d3703 VPCMPEQQ %YMM1,%YMM2,%YMM2 |
(4039) 0x4d3708 VPSUBQ %YMM2,%YMM0,%YMM0 |
(4039) 0x4d370c ADD $0x4,%R11 |
(4039) 0x4d3710 CMP %R8,%R11 |
(4039) 0x4d3713 JLE 4d36f0 |
(4037) 0x4d3715 VEXTRACTI128 $0x1,%YMM0,%XMM1 |
(4037) 0x4d371b VPADDQ %XMM1,%XMM0,%XMM0 |
(4037) 0x4d371f VPSHUFD $-0x12,%XMM0,%XMM1 |
(4037) 0x4d3724 VPADDQ %XMM1,%XMM0,%XMM0 |
(4037) 0x4d3728 VMOVQ %XMM0,%R11 |
(4037) 0x4d372d MOV %RDI,%R12 |
(4037) 0x4d3730 CMP %RDI,%RDX |
(4037) 0x4d3733 JNE 4d3758 |
(4037) 0x4d3735 TEST %R11,%R11 |
(4037) 0x4d3738 JE 4d36c0 |
(4037) 0x4d373a ADD %R11,(%R15) |
(4037) 0x4d373d JMP 4d36c0 |
0x4d373f NOP |
(4037) 0x4d3740 XOR %R11D,%R11D |
(4037) 0x4d3743 XOR %R12D,%R12D |
(4037) 0x4d3746 JMP 4d3758 |
0x4d3748 NOPL (%RAX,%RAX,1) |
(4038) 0x4d3750 INC %R12 |
(4038) 0x4d3753 CMP %R12,%RDX |
(4038) 0x4d3756 JE 4d3735 |
(4038) 0x4d3758 CMP %R10,(%RSI,%R12,8) |
(4038) 0x4d375c JNE 4d3750 |
(4038) 0x4d375e MOVQ $-0x1,(%RSI,%R12,8) |
(4038) 0x4d3766 INC %R11 |
(4038) 0x4d3769 JMP 4d3750 |
0x4d376b NOPL (%RAX,%RAX,1) |
(4035) 0x4d3770 MOV -0x50(%RBP),%RDI |
(4035) 0x4d3774 MOV -0x48(%RBP),%RSI |
(4035) 0x4d3778 MOV -0x58(%RBP),%RDX |
(4035) 0x4d377c MOV -0x80(%RBP),%R13 |
(4035) 0x4d3780 MOV -0x30(%RBP),%RCX |
(4035) 0x4d3784 MOV 0x20(%RBP),%RAX |
(4035) 0x4d3788 JMP 4d3514 |
(4035) 0x4d378d MOV %RCX,%R8 |
(4035) 0x4d3790 MOV 0x18(%RBP),%RCX |
(4035) 0x4d3794 MOV 0x38(%RCX),%RAX |
(4035) 0x4d3798 MOV 0x40(%RCX),%RCX |
(4035) 0x4d379c MOV (%RCX,%R10,8),%R9 |
(4035) 0x4d37a0 MOV (%RAX,%R10,8),%R11 |
(4035) 0x4d37a4 TEST %R14,%R14 |
(4035) 0x4d37a7 JLE 4d3b29 |
(4035) 0x4d37ad MOV %RDX,-0x58(%RBP) |
(4035) 0x4d37b1 MOV 0x60(%RBP),%RAX |
(4035) 0x4d37b5 MOV 0x8(%RAX,%R10,8),%RAX |
(4035) 0x4d37ba MOV %RAX,-0xa8(%RBP) |
(4035) 0x4d37c1 MOV 0x78(%RBP),%RAX |
(4035) 0x4d37c5 MOV 0x8(%RAX,%R10,8),%RAX |
(4035) 0x4d37ca MOV %RAX,-0x60(%RBP) |
(4035) 0x4d37ce MOV %R8,%RCX |
(4035) 0x4d37d1 LEA (%R8,%R14,1),%RAX |
(4035) 0x4d37d5 MOV %RAX,-0x98(%RBP) |
(4035) 0x4d37dc XOR %R8D,%R8D |
(4035) 0x4d37df MOV %R9,-0x70(%RBP) |
(4035) 0x4d37e3 MOV %R9,-0x78(%RBP) |
(4035) 0x4d37e7 MOV %R11,-0x90(%RBP) |
(4035) 0x4d37ee MOV %R11,-0x68(%RBP) |
(4035) 0x4d37f2 MOV 0x68(%RBP),%RDI |
(4035) 0x4d37f6 JMP 4d3b85 |
(4035) 0x4d37fb XOR %EAX,%EAX |
(4035) 0x4d37fd MOV %RAX,-0x60(%RBP) |
(4035) 0x4d3801 TEST %R14,%R14 |
(4035) 0x4d3804 JLE 4d35c6 |
(4035) 0x4d380a LEA -0x1(%R14),%RAX |
(4035) 0x4d380e MOV %R12D,%ECX |
(4035) 0x4d3811 AND $0x7f,%ECX |
(4035) 0x4d3814 MOV $0x80,%ESI |
(4035) 0x4d3819 SUB %ECX,%ESI |
(4035) 0x4d381b SHR $0x3,%ESI |
(4035) 0x4d381e CMP %RSI,%R13 |
(4035) 0x4d3821 MOV %RSI,-0xa8(%RBP) |
(4035) 0x4d3828 CMOVB %R13,%RSI |
(4035) 0x4d382c MOV %R13,%RDX |
(4035) 0x4d382f SUB %RSI,%RDX |
(4035) 0x4d3832 MOV %RDX,-0xb8(%RBP) |
(4035) 0x4d3839 MOV %RDX,%RDI |
(4035) 0x4d383c AND $-0x10,%RDI |
(4035) 0x4d3840 LEA -0x1(%RDI),%R9 |
(4035) 0x4d3844 MOV %ESI,%EDX |
(4035) 0x4d3846 LEA (%R12,%RDX,8),%R10 |
(4035) 0x4d384a MOV %RDI,-0x98(%RBP) |
(4035) 0x4d3851 LEA (%RSI,%RDI,1),%RDX |
(4035) 0x4d3855 MOV %RDX,-0xb0(%RBP) |
(4035) 0x4d385c XOR %EDX,%EDX |
(4035) 0x4d385e MOV %RDX,-0x90(%RBP) |
(4035) 0x4d3865 MOV %R13,-0x70(%RBP) |
(4035) 0x4d3869 XOR %R11D,%R11D |
(4035) 0x4d386c JMP 4d38a6 |
0x4d386e XCHG %AX,%AX |
(4052) 0x4d3870 MOV -0x60(%RBP),%RDI |
(4052) 0x4d3874 MOV -0x90(%RBP),%R8 |
(4052) 0x4d387b MOV %RDX,(%RDI,%R8,8) |
(4052) 0x4d387f MOV -0x88(%RBP),%RDX |
(4052) 0x4d3886 VMOVQ %XMM0,(%RDX,%R8,8) |
(4052) 0x4d388c INC %R8 |
(4052) 0x4d388f MOV %R8,-0x90(%RBP) |
(4052) 0x4d3896 LEA 0x1(%R11),%RDX |
(4052) 0x4d389a CMP %RAX,%R11 |
(4052) 0x4d389d MOV %RDX,%R11 |
(4052) 0x4d38a0 JE 4d39d3 |
(4052) 0x4d38a6 MOV -0x30(%RBP),%RDX |
(4052) 0x4d38aa LEA (%RDX,%R11,1),%RDI |
(4052) 0x4d38ae TEST %R13,%R13 |
(4052) 0x4d38b1 JLE 4d3960 |
(4052) 0x4d38b7 MOV (%RBX,%RDI,8),%RDX |
(4052) 0x4d38bb CMP $0x78,%ECX |
(4052) 0x4d38be JA 4d38e2 |
(4052) 0x4d38c0 XOR %R8D,%R8D |
(4052) 0x4d38c3 NOPW %CS:(%RAX,%RAX,1) |
(4055) 0x4d38d0 CMP %RDX,(%R12,%R8,8) |
(4055) 0x4d38d4 JE 4d39bb |
(4055) 0x4d38da INC %R8 |
(4055) 0x4d38dd CMP %R8,%RSI |
(4055) 0x4d38e0 JNE 4d38d0 |
(4052) 0x4d38e2 CMP %R13,-0xa8(%RBP) |
(4052) 0x4d38e9 JAE 4d3960 |
(4052) 0x4d38eb CMPQ $0,-0x98(%RBP) |
(4052) 0x4d38f3 JE 4d3936 |
(4052) 0x4d38f5 VPBROADCASTQ %RDX,%YMM0 |
(4052) 0x4d38fb XOR %R8D,%R8D |
(4052) 0x4d38fe XCHG %AX,%AX |
(4054) 0x4d3900 VPCMPEQQ 0x20(%R10,%R8,8),%YMM0,%K0 |
(4054) 0x4d3908 VPCMPEQQ (%R10,%R8,8),%YMM0,%K1 |
(4054) 0x4d390f VPCMPEQQ 0x60(%R10,%R8,8),%YMM0,%K2 |
(4054) 0x4d3917 VPCMPEQQ 0x40(%R10,%R8,8),%YMM0,%K3 |
(4054) 0x4d391f KORB %K0,%K1,%K4 |
(4054) 0x4d3923 KORB %K2,%K3,%K5 |
(4054) 0x4d3927 KORTESTB %K5,%K4 |
(4054) 0x4d392b JNE 4d3995 |
(4054) 0x4d392d ADD $0x10,%R8 |
(4054) 0x4d3931 CMP %R9,%R8 |
(4054) 0x4d3934 JBE 4d3900 |
(4052) 0x4d3936 MOV -0x98(%RBP),%R8 |
(4052) 0x4d393d CMP -0xb8(%RBP),%R8 |
(4052) 0x4d3944 JE 4d3960 |
(4052) 0x4d3946 MOV -0xb0(%RBP),%R8 |
(4052) 0x4d394d NOPL (%RAX) |
(4053) 0x4d3950 CMP %RDX,(%R12,%R8,8) |
(4053) 0x4d3954 JE 4d39bb |
(4053) 0x4d3956 INC %R8 |
(4053) 0x4d3959 CMP %R8,%R13 |
(4053) 0x4d395c JNE 4d3950 |
(4052) 0x4d395e XCHG %AX,%AX |
(4052) 0x4d3960 MOV (%RBX,%RDI,8),%RDX |
(4052) 0x4d3964 MOV 0x10(%RBP),%R8 |
(4052) 0x4d3968 VMOVQ (%R8,%RDI,8),%XMM0 |
(4052) 0x4d396e MOV -0x70(%RBP),%RDI |
(4052) 0x4d3972 CMP -0x78(%RBP),%RDI |
(4052) 0x4d3976 JGE 4d3870 |
(4052) 0x4d397c MOV %RDX,(%R12,%RDI,8) |
(4052) 0x4d3980 MOV -0x68(%RBP),%RDX |
(4052) 0x4d3984 VMOVQ %XMM0,(%RDX,%RDI,8) |
(4052) 0x4d3989 INC %RDI |
(4052) 0x4d398c MOV %RDI,-0x70(%RBP) |
(4052) 0x4d3990 JMP 4d3896 |
(4052) 0x4d3995 KSHIFTLB $0x4,%K0,%K0 |
(4052) 0x4d399b KORB %K0,%K1,%K0 |
(4052) 0x4d399f KSHIFTLB $0x4,%K2,%K1 |
(4052) 0x4d39a5 KORB %K1,%K3,%K1 |
(4052) 0x4d39a9 KUNPCKBW %K0,%K1,%K0 |
(4052) 0x4d39ad KMOVD %K0,%EDX |
(4052) 0x4d39b1 TZCNT %EDX,%EDX |
(4052) 0x4d39b5 ADD %RSI,%R8 |
(4052) 0x4d39b8 ADD %RDX,%R8 |
(4052) 0x4d39bb MOV 0x10(%RBP),%RDX |
(4052) 0x4d39bf VMOVQ (%RDX,%RDI,8),%XMM0 |
(4052) 0x4d39c4 MOV -0x68(%RBP),%RDX |
(4052) 0x4d39c8 VMOVQ %XMM0,(%RDX,%R8,8) |
(4052) 0x4d39ce JMP 4d3896 |
(4035) 0x4d39d3 MOV -0x30(%RBP),%RCX |
(4035) 0x4d39d7 ADD %R14,%RCX |
(4035) 0x4d39da MOV -0x90(%RBP),%RSI |
(4035) 0x4d39e1 MOV -0x70(%RBP),%RAX |
(4035) 0x4d39e5 LEA (%RAX,%RSI,1),%R12 |
(4035) 0x4d39e9 MOV 0x48(%RBP),%RAX |
(4035) 0x4d39ed MOV -0x40(%RBP),%RDX |
(4035) 0x4d39f1 MOV %R12,(%RAX,%RDX,8) |
(4035) 0x4d39f5 TEST %RSI,%RSI |
(4035) 0x4d39f8 JE 4d3b13 |
(4035) 0x4d39fe MOV %RCX,-0x30(%RBP) |
(4035) 0x4d3a02 MOV 0x38(%RBP),%R13 |
(4035) 0x4d3a06 MOV (%R13,%RDX,8),%RDI |
(4035) 0x4d3a0b LEA (,%R12,8),%R14 |
(4035) 0x4d3a13 MOV %R14,%RSI |
(4035) 0x4d3a16 VZEROUPPER |
(4035) 0x4d3a19 CALL 4e69e0 <hypre_ReAlloc> |
(4035) 0x4d3a1e MOV -0x40(%RBP),%RCX |
(4035) 0x4d3a22 MOV %RAX,(%R13,%RCX,8) |
(4035) 0x4d3a27 MOV 0x40(%RBP),%R13 |
(4035) 0x4d3a2b MOV -0x40(%RBP),%RAX |
(4035) 0x4d3a2f MOV (%R13,%RAX,8),%RDI |
(4035) 0x4d3a34 MOV %R14,%RSI |
(4035) 0x4d3a37 CALL 4e69e0 <hypre_ReAlloc> |
(4035) 0x4d3a3c MOV -0x90(%RBP),%RDX |
(4035) 0x4d3a43 MOV -0x40(%RBP),%RSI |
(4035) 0x4d3a47 MOV %RAX,(%R13,%RSI,8) |
(4035) 0x4d3a4c MOV 0x50(%RBP),%RCX |
(4035) 0x4d3a50 MOV %R12,(%RCX,%RSI,8) |
(4035) 0x4d3a54 TEST %RDX,%RDX |
(4035) 0x4d3a57 MOV -0x80(%RBP),%R13 |
(4035) 0x4d3a5b JLE 4d3afd |
(4035) 0x4d3a61 MOV 0x38(%RBP),%RCX |
(4035) 0x4d3a65 MOV (%RCX,%RSI,8),%RCX |
(4035) 0x4d3a69 MOV -0x60(%RBP),%R9 |
(4035) 0x4d3a6d LEA -0x8(%R9,%RDX,8),%RSI |
(4035) 0x4d3a72 MOV -0x70(%RBP),%R11 |
(4035) 0x4d3a76 LEA (%RCX,%R11,8),%RDI |
(4035) 0x4d3a7a CMP %RDI,%RSI |
(4035) 0x4d3a7d SETAE %R10B |
(4035) 0x4d3a81 LEA -0x1(%RDX,%R11,1),%RSI |
(4035) 0x4d3a86 LEA (%RCX,%RSI,8),%RCX |
(4035) 0x4d3a8a CMP %R9,%RCX |
(4035) 0x4d3a8d SETAE %R8B |
(4035) 0x4d3a91 MOV -0x88(%RBP),%R12 |
(4035) 0x4d3a98 LEA -0x8(%R12,%RDX,8),%RCX |
(4035) 0x4d3a9d LEA (%RAX,%R11,8),%R14 |
(4035) 0x4d3aa1 CMP %R14,%RCX |
(4035) 0x4d3aa4 SETB %CL |
(4035) 0x4d3aa7 LEA (%RAX,%RSI,8),%RAX |
(4035) 0x4d3aab CMP %R12,%RAX |
(4035) 0x4d3aae SETB %AL |
(4035) 0x4d3ab1 TEST %R8B,%R10B |
(4035) 0x4d3ab4 JNE 4d3e95 |
(4035) 0x4d3aba OR %AL,%CL |
(4035) 0x4d3abc JE 4d3e95 |
(4035) 0x4d3ac2 CMP $0xd,%RDX |
(4035) 0x4d3ac6 JB 4d3f79 |
(4035) 0x4d3acc SAL $0x3,%RDX |
(4035) 0x4d3ad0 MOV %R12,-0x88(%RBP) |
(4035) 0x4d3ad7 MOV %R9,%RSI |
(4035) 0x4d3ada MOV %RDX,%R12 |
(4035) 0x4d3add CALL 4efac0 <_intel_fast_memcpy> |
(4035) 0x4d3ae2 MOV %R14,%RDI |
(4035) 0x4d3ae5 MOV -0x88(%RBP),%RSI |
(4035) 0x4d3aec MOV %R12,%RDX |
(4035) 0x4d3aef CALL 4efac0 <_intel_fast_memcpy> |
(4035) 0x4d3af4 MOV -0x60(%RBP),%R9 |
(4035) 0x4d3af8 JMP 4d3fb6 |
(4035) 0x4d3afd MOV -0x30(%RBP),%RCX |
(4035) 0x4d3b01 MOV -0x60(%RBP),%R9 |
(4035) 0x4d3b05 TEST %R9,%R9 |
(4035) 0x4d3b08 JNE 4d3fba |
(4035) 0x4d3b0e JMP 4d3fe0 |
(4035) 0x4d3b13 MOV -0x80(%RBP),%R13 |
(4035) 0x4d3b17 MOV -0x60(%RBP),%R9 |
(4035) 0x4d3b1b TEST %R9,%R9 |
(4035) 0x4d3b1e JNE 4d3fba |
(4035) 0x4d3b24 JMP 4d3fe0 |
(4035) 0x4d3b29 MOV %R8,%RCX |
(4035) 0x4d3b2c JMP 4d3f20 |
(4042) 0x4d3b31 KSHIFTLB $0x4,%K0,%K0 |
(4042) 0x4d3b37 KORB %K0,%K1,%K0 |
(4042) 0x4d3b3b KSHIFTLB $0x4,%K2,%K1 |
(4042) 0x4d3b41 KORB %K1,%K3,%K1 |
(4042) 0x4d3b45 KUNPCKBW %K0,%K1,%K0 |
(4042) 0x4d3b49 KMOVD %K0,%EAX |
(4042) 0x4d3b4d TZCNT %EAX,%EAX |
(4042) 0x4d3b51 ADD %RDI,%R12 |
(4042) 0x4d3b54 ADD %RAX,%R12 |
(4042) 0x4d3b57 MOV -0x30(%RBP),%RCX |
(4042) 0x4d3b5b MOV 0x68(%RBP),%RDI |
(4042) 0x4d3b5f MOV 0x10(%RBP),%RAX |
(4042) 0x4d3b63 VMOVQ (%RAX,%RCX,8),%XMM0 |
(4042) 0x4d3b68 MOV 0x70(%RBP),%RAX |
(4042) 0x4d3b6c VMOVQ %XMM0,(%RAX,%R12,8) |
(4042) 0x4d3b72 MOV -0x40(%RBP),%R10 |
(4042) 0x4d3b76 INC %RCX |
(4042) 0x4d3b79 INC %R8 |
(4042) 0x4d3b7c CMP %R14,%R8 |
(4042) 0x4d3b7f JE 4d3e7d |
(4042) 0x4d3b85 MOV (%RBX,%RCX,8),%R9 |
(4042) 0x4d3b89 CMP 0x28(%RBP),%R9 |
(4042) 0x4d3b8d JL 4d3ce0 |
(4042) 0x4d3b93 CMP 0x30(%RBP),%R9 |
(4042) 0x4d3b97 JG 4d3ce0 |
(4042) 0x4d3b9d MOV 0x60(%RBP),%RAX |
(4042) 0x4d3ba1 MOV (%RAX,%R10,8),%R10 |
(4042) 0x4d3ba5 MOV -0x90(%RBP),%R11 |
(4042) 0x4d3bac SUB %R10,%R11 |
(4042) 0x4d3baf JLE 4d3ca0 |
(4042) 0x4d3bb5 LEA (%RDI,%R10,8),%EAX |
(4042) 0x4d3bb9 AND $0x7f,%EAX |
(4042) 0x4d3bbc MOV $0x80,%EDX |
(4042) 0x4d3bc1 SUB %EAX,%EDX |
(4042) 0x4d3bc3 SHR $0x3,%EDX |
(4042) 0x4d3bc6 CMP %RDX,%R11 |
(4042) 0x4d3bc9 MOV %RDX,%RSI |
(4042) 0x4d3bcc CMOVB %R11,%RSI |
(4042) 0x4d3bd0 TEST %RSI,%RSI |
(4042) 0x4d3bd3 JE 4d3bf2 |
(4042) 0x4d3bd5 MOV %R10,%R12 |
(4042) 0x4d3bd8 MOV %RSI,%RAX |
(4042) 0x4d3bdb NOPL (%RAX,%RAX,1) |
(4048) 0x4d3be0 CMP %R9,(%RDI,%R12,8) |
(4048) 0x4d3be4 JE 4d3b5f |
(4048) 0x4d3bea INC %R12 |
(4048) 0x4d3bed DEC %RAX |
(4048) 0x4d3bf0 JNE 4d3be0 |
(4042) 0x4d3bf2 CMP %R11,%RDX |
(4042) 0x4d3bf5 JAE 4d3ca0 |
(4042) 0x4d3bfb MOV %RCX,-0x30(%RBP) |
(4042) 0x4d3bff SUB %RSI,%R11 |
(4042) 0x4d3c02 MOV %R11,%RDX |
(4042) 0x4d3c05 AND $-0x10,%RDX |
(4042) 0x4d3c09 JE 4d3c6a |
(4042) 0x4d3c0b LEA -0x1(%RDX),%RAX |
(4042) 0x4d3c0f VPBROADCASTQ %R9,%YMM0 |
(4042) 0x4d3c15 LEA (%R10,%RSI,1),%R12 |
(4042) 0x4d3c19 MOV 0x68(%RBP),%RCX |
(4042) 0x4d3c1d LEA (%RCX,%R12,8),%RCX |
(4042) 0x4d3c21 XOR %EDI,%EDI |
(4042) 0x4d3c23 NOPW %CS:(%RAX,%RAX,1) |
(4047) 0x4d3c30 VPCMPEQQ 0x20(%RCX,%RDI,8),%YMM0,%K0 |
(4047) 0x4d3c38 VPCMPEQQ (%RCX,%RDI,8),%YMM0,%K1 |
(4047) 0x4d3c3f VPCMPEQQ 0x60(%RCX,%RDI,8),%YMM0,%K2 |
(4047) 0x4d3c47 VPCMPEQQ 0x40(%RCX,%RDI,8),%YMM0,%K3 |
(4047) 0x4d3c4f KORB %K0,%K1,%K4 |
(4047) 0x4d3c53 KORB %K2,%K3,%K5 |
(4047) 0x4d3c57 KORTESTB %K5,%K4 |
(4047) 0x4d3c5b JNE 4d3b31 |
(4047) 0x4d3c61 ADD $0x10,%RDI |
(4047) 0x4d3c65 CMP %RAX,%RDI |
(4047) 0x4d3c68 JBE 4d3c30 |
(4042) 0x4d3c6a CMP %R11,%RDX |
(4042) 0x4d3c6d MOV -0x30(%RBP),%RCX |
(4042) 0x4d3c71 MOV 0x68(%RBP),%RDI |
(4042) 0x4d3c75 JE 4d3ca0 |
(4042) 0x4d3c77 ADD %RSI,%R10 |
(4042) 0x4d3c7a ADD %RDX,%R10 |
(4042) 0x4d3c7d MOV %R10,%R12 |
(4046) 0x4d3c80 CMP %R9,(%RDI,%R12,8) |
(4046) 0x4d3c84 JE 4d3b5f |
(4046) 0x4d3c8a INC %R12 |
(4046) 0x4d3c8d CMP %R12,-0x90(%RBP) |
(4046) 0x4d3c94 JNE 4d3c80 |
(4042) 0x4d3c96 NOPW %CS:(%RAX,%RAX,1) |
(4042) 0x4d3ca0 MOV -0x68(%RBP),%RDX |
(4042) 0x4d3ca4 CMP -0xa8(%RBP),%RDX |
(4042) 0x4d3cab JGE 4d3f41 |
(4042) 0x4d3cb1 MOV %R9,(%RDI,%RDX,8) |
(4042) 0x4d3cb5 MOV 0x10(%RBP),%RAX |
(4042) 0x4d3cb9 VMOVQ (%RAX,%RCX,8),%XMM0 |
(4042) 0x4d3cbe MOV 0x70(%RBP),%RAX |
(4042) 0x4d3cc2 VMOVQ %XMM0,(%RAX,%RDX,8) |
(4042) 0x4d3cc7 INC %RDX |
(4042) 0x4d3cca MOV %RDX,-0x68(%RBP) |
(4042) 0x4d3cce JMP 4d3b72 |
0x4d3cd3 NOPW %CS:(%RAX,%RAX,1) |
(4042) 0x4d3ce0 MOV 0x78(%RBP),%RAX |
(4042) 0x4d3ce4 MOV (%RAX,%R10,8),%R10 |
(4042) 0x4d3ce8 MOV -0x70(%RBP),%R11 |
(4042) 0x4d3cec SUB %R10,%R11 |
(4042) 0x4d3cef JLE 4d3e00 |
(4042) 0x4d3cf5 MOV 0x80(%RBP),%RAX |
(4042) 0x4d3cfc LEA (%RAX,%R10,8),%EAX |
(4042) 0x4d3d00 AND $0x7f,%EAX |
(4042) 0x4d3d03 MOV $0x80,%EDX |
(4042) 0x4d3d08 SUB %EAX,%EDX |
(4042) 0x4d3d0a SHR $0x3,%EDX |
(4042) 0x4d3d0d CMP %RDX,%R11 |
(4042) 0x4d3d10 MOV %RDX,%RSI |
(4042) 0x4d3d13 CMOVB %R11,%RSI |
(4042) 0x4d3d17 TEST %RSI,%RSI |
(4042) 0x4d3d1a JE 4d3d49 |
(4042) 0x4d3d1c MOV %R10,%R12 |
(4042) 0x4d3d1f MOV %RSI,%RAX |
(4042) 0x4d3d22 NOPW %CS:(%RAX,%RAX,1) |
(4045) 0x4d3d30 MOV 0x80(%RBP),%RDI |
(4045) 0x4d3d37 CMP %R9,(%RDI,%R12,8) |
(4045) 0x4d3d3b JE 4d3e64 |
(4045) 0x4d3d41 INC %R12 |
(4045) 0x4d3d44 DEC %RAX |
(4045) 0x4d3d47 JNE 4d3d30 |
(4042) 0x4d3d49 CMP %R11,%RDX |
(4042) 0x4d3d4c MOV 0x68(%RBP),%RDI |
(4042) 0x4d3d50 JAE 4d3e00 |
(4042) 0x4d3d56 MOV %RCX,-0x30(%RBP) |
(4042) 0x4d3d5a SUB %RSI,%R11 |
(4042) 0x4d3d5d MOV %R11,%RDX |
(4042) 0x4d3d60 AND $-0x10,%RDX |
(4042) 0x4d3d64 JE 4d3dc6 |
(4042) 0x4d3d66 LEA -0x1(%RDX),%RAX |
(4042) 0x4d3d6a VPBROADCASTQ %R9,%YMM0 |
(4042) 0x4d3d70 LEA (%R10,%RSI,1),%R12 |
(4042) 0x4d3d74 MOV 0x80(%RBP),%RCX |
(4042) 0x4d3d7b LEA (%RCX,%R12,8),%RCX |
(4042) 0x4d3d7f XOR %EDI,%EDI |
(4042) 0x4d3d81 NOPW %CS:(%RAX,%RAX,1) |
(4044) 0x4d3d90 VPCMPEQQ 0x20(%RCX,%RDI,8),%YMM0,%K0 |
(4044) 0x4d3d98 VPCMPEQQ (%RCX,%RDI,8),%YMM0,%K1 |
(4044) 0x4d3d9f VPCMPEQQ 0x60(%RCX,%RDI,8),%YMM0,%K2 |
(4044) 0x4d3da7 VPCMPEQQ 0x40(%RCX,%RDI,8),%YMM0,%K3 |
(4044) 0x4d3daf KORB %K0,%K1,%K4 |
(4044) 0x4d3db3 KORB %K2,%K3,%K5 |
(4044) 0x4d3db7 KORTESTB %K5,%K4 |
(4044) 0x4d3dbb JNE 4d3e3a |
(4044) 0x4d3dbd ADD $0x10,%RDI |
(4044) 0x4d3dc1 CMP %RAX,%RDI |
(4044) 0x4d3dc4 JBE 4d3d90 |
(4042) 0x4d3dc6 CMP %R11,%RDX |
(4042) 0x4d3dc9 MOV -0x30(%RBP),%RCX |
(4042) 0x4d3dcd MOV 0x68(%RBP),%RDI |
(4042) 0x4d3dd1 JE 4d3e00 |
(4042) 0x4d3dd3 ADD %RSI,%R10 |
(4042) 0x4d3dd6 ADD %RDX,%R10 |
(4042) 0x4d3dd9 MOV %R10,%R12 |
(4042) 0x4d3ddc NOPL (%RAX) |
(4043) 0x4d3de0 MOV 0x80(%RBP),%RAX |
(4043) 0x4d3de7 CMP %R9,(%RAX,%R12,8) |
(4043) 0x4d3deb JE 4d3e68 |
(4043) 0x4d3ded INC %R12 |
(4043) 0x4d3df0 CMP %R12,-0x70(%RBP) |
(4043) 0x4d3df4 JNE 4d3de0 |
(4042) 0x4d3df6 NOPW %CS:(%RAX,%RAX,1) |
(4042) 0x4d3e00 MOV -0x78(%RBP),%RDX |
(4042) 0x4d3e04 CMP -0x60(%RBP),%RDX |
(4042) 0x4d3e08 JGE 4d3ec1 |
(4042) 0x4d3e0e MOV 0x80(%RBP),%RAX |
(4042) 0x4d3e15 MOV %R9,(%RAX,%RDX,8) |
(4042) 0x4d3e19 MOV 0x10(%RBP),%RAX |
(4042) 0x4d3e1d VMOVQ (%RAX,%RCX,8),%XMM0 |
(4042) 0x4d3e22 MOV 0x88(%RBP),%RAX |
(4042) 0x4d3e29 VMOVQ %XMM0,(%RAX,%RDX,8) |
(4042) 0x4d3e2e INC %RDX |
(4042) 0x4d3e31 MOV %RDX,-0x78(%RBP) |
(4042) 0x4d3e35 JMP 4d3b72 |
(4042) 0x4d3e3a KSHIFTLB $0x4,%K0,%K0 |
(4042) 0x4d3e40 KORB %K0,%K1,%K0 |
(4042) 0x4d3e44 KSHIFTLB $0x4,%K2,%K1 |
(4042) 0x4d3e4a KORB %K1,%K3,%K1 |
(4042) 0x4d3e4e KUNPCKBW %K0,%K1,%K0 |
(4042) 0x4d3e52 KMOVD %K0,%EAX |
(4042) 0x4d3e56 TZCNT %EAX,%EAX |
(4042) 0x4d3e5a ADD %RDI,%R12 |
(4042) 0x4d3e5d ADD %RAX,%R12 |
(4042) 0x4d3e60 MOV -0x30(%RBP),%RCX |
(4042) 0x4d3e64 MOV 0x68(%RBP),%RDI |
(4042) 0x4d3e68 MOV 0x10(%RBP),%RAX |
(4042) 0x4d3e6c VMOVQ (%RAX,%RCX,8),%XMM0 |
(4042) 0x4d3e71 MOV 0x88(%RBP),%RAX |
(4042) 0x4d3e78 JMP 4d3b6c |
(4035) 0x4d3e7d MOV -0x98(%RBP),%RCX |
(4035) 0x4d3e84 MOV -0x50(%RBP),%RDI |
(4035) 0x4d3e88 MOV -0x48(%RBP),%RSI |
(4035) 0x4d3e8c MOV -0x58(%RBP),%RDX |
(4035) 0x4d3e90 JMP 4d3f18 |
(4035) 0x4d3e95 XOR %EAX,%EAX |
(4035) 0x4d3e97 NOPW (%RAX,%RAX,1) |
(4049) 0x4d3ea0 MOV (%R9,%RAX,8),%RCX |
(4049) 0x4d3ea4 MOV %RCX,(%RDI,%RAX,8) |
(4049) 0x4d3ea8 VMOVQ (%R12,%RAX,8),%XMM0 |
(4049) 0x4d3eae VMOVQ %XMM0,(%R14,%RAX,8) |
(4049) 0x4d3eb4 INC %RAX |
(4049) 0x4d3eb7 CMP %RAX,%RDX |
(4049) 0x4d3eba JNE 4d3ea0 |
(4035) 0x4d3ebc JMP 4d3fb6 |
(4035) 0x4d3ec1 MOV %RCX,%R14 |
(4035) 0x4d3ec4 MOV $0x510f23,%EDI |
(4035) 0x4d3ec9 MOV $0xd4e,%ESI |
(4035) 0x4d3ece MOV $0x1,%EDX |
(4035) 0x4d3ed3 XOR %ECX,%ECX |
(4035) 0x4d3ed5 VZEROUPPER |
(4035) 0x4d3ed8 CALL 4e9300 <hypre_error_handler> |
(4035) 0x4d3edd MOV 0xd0(%RBP),%RAX |
(4035) 0x4d3ee4 LOCK INCQ (%RAX) |
(4035) 0x4d3ee8 MOV $0x51107f,%EDI |
(4035) 0x4d3eed CMPQ $0,0xc0(%RBP) |
(4035) 0x4d3ef5 JE 4d3f01 |
(4035) 0x4d3ef7 MOV %R13,%RSI |
(4035) 0x4d3efa XOR %EAX,%EAX |
(4035) 0x4d3efc CALL 4e6b40 <hypre_printf> |
(4035) 0x4d3f01 MOV -0x50(%RBP),%RDI |
(4035) 0x4d3f05 MOV -0x48(%RBP),%RSI |
(4035) 0x4d3f09 MOV -0x58(%RBP),%RDX |
(4035) 0x4d3f0d MOV %R14,%RCX |
(4035) 0x4d3f10 VPCMPEQD %YMM3,%YMM3,%YMM3 |
(4035) 0x4d3f14 MOV -0x40(%RBP),%R10 |
(4035) 0x4d3f18 MOV -0x78(%RBP),%R9 |
(4035) 0x4d3f1c MOV -0x68(%RBP),%R11 |
(4035) 0x4d3f20 MOV 0x18(%RBP),%R8 |
(4035) 0x4d3f24 MOV 0x38(%R8),%RAX |
(4035) 0x4d3f28 MOV %R11,(%RAX,%R10,8) |
(4035) 0x4d3f2c MOV 0x40(%R8),%RAX |
(4035) 0x4d3f30 MOV %R9,(%RAX,%R10,8) |
(4035) 0x4d3f34 MOV -0x80(%RBP),%R13 |
(4035) 0x4d3f38 MOV 0x20(%RBP),%RAX |
(4035) 0x4d3f3c JMP 4d3514 |
(4035) 0x4d3f41 MOV %RCX,%R14 |
(4035) 0x4d3f44 MOV $0x510f23,%EDI |
(4035) 0x4d3f49 MOV $0xd70,%ESI |
(4035) 0x4d3f4e MOV $0x1,%EDX |
(4035) 0x4d3f53 XOR %ECX,%ECX |
(4035) 0x4d3f55 VZEROUPPER |
(4035) 0x4d3f58 CALL 4e9300 <hypre_error_handler> |
(4035) 0x4d3f5d MOV 0xd0(%RBP),%RAX |
(4035) 0x4d3f64 LOCK INCQ (%RAX) |
(4035) 0x4d3f68 MOV $0x5110a5,%EDI |
(4035) 0x4d3f6d CMPQ $0,0xc0(%RBP) |
(4035) 0x4d3f75 JNE 4d3ef7 |
(4035) 0x4d3f77 JMP 4d3f01 |
(4035) 0x4d3f79 MOV %RDX,%RAX |
(4035) 0x4d3f7c AND $-0x4,%RAX |
(4035) 0x4d3f80 JE 4d3fb2 |
(4035) 0x4d3f82 LEA -0x1(%RAX),%RCX |
(4035) 0x4d3f86 XOR %ESI,%ESI |
(4035) 0x4d3f88 NOPL (%RAX,%RAX,1) |
(4051) 0x4d3f90 VMOVUPS (%R9,%RSI,8),%YMM0 |
(4051) 0x4d3f96 VMOVUPS %YMM0,(%RDI,%RSI,8) |
(4051) 0x4d3f9b VMOVDQU (%R12,%RSI,8),%YMM0 |
(4051) 0x4d3fa1 VMOVDQU %YMM0,(%R14,%RSI,8) |
(4051) 0x4d3fa7 ADD $0x4,%RSI |
(4051) 0x4d3fab CMP %RCX,%RSI |
(4051) 0x4d3fae JLE 4d3f90 |
(4035) 0x4d3fb0 JMP 4d4017 |
(4035) 0x4d3fb2 XOR %EAX,%EAX |
(4035) 0x4d3fb4 JMP 4d4000 |
(4035) 0x4d3fb6 MOV -0x30(%RBP),%RCX |
(4035) 0x4d3fba MOV %R9,%RDI |
(4035) 0x4d3fbd MOV %RCX,%R14 |
(4035) 0x4d3fc0 VZEROUPPER |
(4035) 0x4d3fc3 CALL 4e6a60 <hypre_Free> |
(4035) 0x4d3fc8 MOV -0x88(%RBP),%RDI |
(4035) 0x4d3fcf CALL 4e6a60 <hypre_Free> |
(4035) 0x4d3fd4 MOV %R14,%RCX |
(4035) 0x4d3fd7 XOR %EAX,%EAX |
(4035) 0x4d3fd9 MOV %RAX,-0x88(%RBP) |
(4035) 0x4d3fe0 MOV -0x50(%RBP),%RDI |
(4035) 0x4d3fe4 MOV -0x48(%RBP),%RSI |
(4035) 0x4d3fe8 MOV -0x58(%RBP),%RDX |
(4035) 0x4d3fec VPCMPEQD %YMM3,%YMM3,%YMM3 |
(4035) 0x4d3ff0 MOV 0x20(%RBP),%RAX |
(4035) 0x4d3ff4 JMP 4d3514 |
0x4d3ff9 NOPL (%RAX) |
(4050) 0x4d4000 MOV (%R9,%RAX,8),%RCX |
(4050) 0x4d4004 MOV %RCX,(%RDI,%RAX,8) |
(4050) 0x4d4008 VMOVQ (%R12,%RAX,8),%XMM0 |
(4050) 0x4d400e VMOVQ %XMM0,(%R14,%RAX,8) |
(4050) 0x4d4014 INC %RAX |
(4050) 0x4d4017 CMP %RAX,%RDX |
(4050) 0x4d401a JNE 4d4000 |
(4035) 0x4d401c JMP 4d3fb6 |
0x4d401e ADD $0x98,%RSP |
0x4d4025 POP %RBX |
0x4d4026 POP %R12 |
0x4d4028 POP %R13 |
0x4d402a POP %R14 |
0x4d402c POP %R15 |
0x4d402e POP %RBP |
0x4d402f VZEROUPPER |
0x4d4032 RET |
0x4d4033 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | IJMatrix_parcsr.c:3240-3484 |
Module | exec |
nb instructions | 164 |
nb uops | 177 |
loop length | 668 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 0 |
nb stack references | 12 |
micro-operation queue | 29.50 cycles |
front end | 29.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 13.30 | 13.20 | 8.33 | 8.33 | 10.00 | 13.20 | 13.10 | 10.00 | 10.00 | 10.00 | 13.20 | 8.33 |
cycles | 13.30 | 16.60 | 8.33 | 8.33 | 10.00 | 13.20 | 13.10 | 10.00 | 10.00 | 10.00 | 13.20 | 8.33 |
Cycles executing div or sqrt instructions | 16.00 |
FE+BE cycles | 28.16-28.21 |
Stall cycles | 0.00 |
Front-end | 29.50 |
Dispatch | 16.60 |
DIV/SQRT | 16.00 |
Overall L1 | 29.50 |
all | 12% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 13% |
load | 10% |
store | 11% |
mul | 12% |
add-sub | 16% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x98,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4e86b0 <hypre_NumActiveThreads> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4e86c0 <hypre_GetThreadNum> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
OR %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
JE 4d32f1 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x81> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
MOV 0xb8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4d3309 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x99> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x1(%RAX),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R14,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %RCX,%R13 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOVQ $0,(%R15,%R14,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R13,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 4d3331 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xc1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4d33c0 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x150> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV 0xb8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 4d32cd <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x5d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R14,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %RAX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVQ $0,(%R15,%R14,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R13,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4d33c0 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x150> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVL %R14,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IMUL %R14,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV -0x48(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 4d33a2 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x132> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R8,%R12,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VEXTRACTI128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPSHUFD $-0x12,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM0,%RSI | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
CMP %RCX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 4d33a6 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x136> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4d33bc <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x14c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,(%R15,%R14,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x736530,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 410030 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R14,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 4d3484 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x214> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4d3484 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x214> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4d3457 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x1e7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV (%R15),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x40(%R15),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 4d3484 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x214> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%R15,%RCX,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x736550,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 410030 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R14,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 4d34a3 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x233> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x8(%R15,%R14,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 4d34a5 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x235> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x50(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R13,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4d401e <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xdae> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x98(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R8,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV %RCX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SETE %CL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%RAX,%R14,8),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST %R9,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
SETLE %AL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
OR %CL,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %AL,-0x31(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
DEC %R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x1,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R9,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPEQD %YMM3,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 4d3520 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x2b0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x98,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | IJMatrix_parcsr.c:3240-3484 |
Module | exec |
nb instructions | 164 |
nb uops | 177 |
loop length | 668 |
used x86 registers | 14 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 0 |
nb stack references | 12 |
micro-operation queue | 29.50 cycles |
front end | 29.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 13.30 | 13.20 | 8.33 | 8.33 | 10.00 | 13.20 | 13.10 | 10.00 | 10.00 | 10.00 | 13.20 | 8.33 |
cycles | 13.30 | 16.60 | 8.33 | 8.33 | 10.00 | 13.20 | 13.10 | 10.00 | 10.00 | 10.00 | 13.20 | 8.33 |
Cycles executing div or sqrt instructions | 16.00 |
FE+BE cycles | 28.16-28.21 |
Stall cycles | 0.00 |
Front-end | 29.50 |
Dispatch | 16.60 |
DIV/SQRT | 16.00 |
Overall L1 | 29.50 |
all | 12% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 33% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 16% |
all | 13% |
load | 10% |
store | 11% |
mul | 12% |
add-sub | 16% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 14% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x98,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,-0x50(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x48(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CALL 4e86b0 <hypre_NumActiveThreads> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 4e86c0 <hypre_GetThreadNum> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
OR %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
JE 4d32f1 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x81> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
MOV 0xb8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4d3309 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x99> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x1(%RAX),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R14,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %RCX,%R13 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOVQ $0,(%R15,%R14,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R13,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 4d3331 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xc1> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4d33c0 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x150> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV 0xb8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R14),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 4d32cd <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x5d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R14,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %RAX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVQ $0,(%R15,%R14,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R13,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4d33c0 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x150> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP %RDX,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVL %R14,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IMUL %R14,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
MOV %R13,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SUB %RDX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
SUB %RAX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV -0x48(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JE 4d33a2 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x132> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RCX),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA (%R8,%R12,8),%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R9D,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VEXTRACTI128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPSHUFD $-0x12,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM0,%RSI | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
CMP %RCX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x48(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE 4d33a6 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x136> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP 4d33bc <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x14c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RSI,(%R15,%R14,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x736530,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 410030 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R14,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE 4d3484 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x214> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 4d3484 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x214> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB 4d3457 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x1e7> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RCX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV (%R15),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x40(%R15),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RAX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 4d3484 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x214> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%R15,%RCX,8),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0xa0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%RAX),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV $0x736550,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CALL 410030 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R14,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE 4d34a3 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x233> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x8(%R15,%R14,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP 4d34a5 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x235> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x50(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x48(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R12,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %R13,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 4d401e <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xdae> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x98(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %R8,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
MOV %RCX,%R8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SETE %CL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
LEA (%RAX,%R14,8),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
TEST %R9,%R9 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
SETLE %AL | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
OR %CL,%AL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
MOV %AL,-0x31(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R8,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
DEC %R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x1,%R9 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %R9,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPEQD %YMM3,%YMM3,%YMM3 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV 0x20(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R8,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %R13,-0x80(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP 4d3520 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x2b0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x98,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_IJMatrixSetValuesOMPParCSR.extracted.28– | 0.38 | 0.08 |
▼Loop 4035 - IJMatrix_parcsr.c:3262-3484 - exec– | 0.03 | 0.01 |
▼Loop 4042 - IJMatrix_parcsr.c:3262-3454 - exec– | 0.35 | 0.07 |
○Loop 4043 - IJMatrix_parcsr.c:3388-3390 - exec | 0 | 0 |
○Loop 4047 - IJMatrix_parcsr.c:3422-3424 - exec | 0 | 0 |
○Loop 4044 - IJMatrix_parcsr.c:3388-3424 - exec | 0 | 0 |
○Loop 4048 - IJMatrix_parcsr.c:3422-3424 - exec | 0 | 0 |
○Loop 4046 - IJMatrix_parcsr.c:3422-3424 - exec | 0 | 0 |
○Loop 4045 - IJMatrix_parcsr.c:3388-3390 - exec | 0 | 0 |
▼Loop 4036 - IJMatrix_parcsr.c:3262-3484 - exec– | 0 | 0 |
▼Loop 4037 - IJMatrix_parcsr.c:3262-3484 - exec– | 0 | 0 |
○Loop 4039 - IJMatrix_parcsr.c:3262-3482 - exec | 0 | 0 |
○Loop 4038 - IJMatrix_parcsr.c:3262-3482 - exec | 0 | 0 |
▼Loop 4040 - IJMatrix_parcsr.c:3475-3484 - exec– | 0 | 0 |
○Loop 4041 - IJMatrix_parcsr.c:3478-3484 - exec | 0 | 0 |
○Loop 4049 - IJMatrix_parcsr.c:3359-3362 - exec | 0 | 0 |
○Loop 4050 - IJMatrix_parcsr.c:3359-3362 - exec | 0 | 0 |
▼Loop 4052 - IJMatrix_parcsr.c:3262-3337 - exec– | 0 | 0 |
○Loop 4053 - IJMatrix_parcsr.c:3318-3320 - exec | 0 | 0 |
○Loop 4054 - IJMatrix_parcsr.c:3318-3320 - exec | 0 | 0 |
○Loop 4055 - IJMatrix_parcsr.c:3318-3320 - exec | 0 | 0 |
○Loop 4051 - IJMatrix_parcsr.c:3359-3362 - exec | 0 | 0 |
○Loop 4058 - IJMatrix_parcsr.c:3274-3275 - exec | 0.01 | 0 |
○Loop 4056 - IJMatrix_parcsr.c:3282-3283 - exec | 0 | 0 |
○Loop 4034 - IJMatrix_parcsr.c:3274-3275 - exec | 0 | 0 |
○Loop 4057 - IJMatrix_parcsr.c:3282-3283 - exec | 0 | 0 |