Function: hypre_IJMatrixSetValuesOMPParCSR.extracted.28 | Module: libIJ_mv.so | Source: IJMatrix_parcsr.c:3240-3484 [...] | Coverage: 0.39% |
---|
Function: hypre_IJMatrixSetValuesOMPParCSR.extracted.28 | Module: libIJ_mv.so | Source: IJMatrix_parcsr.c:3240-3484 [...] | Coverage: 0.39% |
---|
/scratch_na/users/xoserete/qaas_runs/171-415-3872/intel/AMG/build/AMG/AMG/IJ_mv/IJMatrix_parcsr.c: 3240 - 3484 |
-------------------------------------------------------------------------------- |
3240: #pragma omp parallel |
[...] |
3256: num_threads = hypre_NumActiveThreads(); |
3257: my_thread_num = hypre_GetThreadNum(); |
3258: |
3259: len = nrows/num_threads; |
3260: rest = nrows - len*num_threads; |
3261: |
3262: if (my_thread_num < rest) |
3263: { |
3264: ns = my_thread_num*(len+1); |
3265: ne = (my_thread_num+1)*(len+1); |
3266: } |
3267: else |
3268: { |
3269: ns = my_thread_num*len+rest; |
3270: ne = (my_thread_num+1)*len+rest; |
3271: } |
3272: |
3273: value_start[my_thread_num] = 0; |
3274: for (ii=ns; ii < ne; ii++) |
3275: value_start[my_thread_num] += ncols[ii]; |
3276: |
3277: #ifdef HYPRE_USING_OPENMP |
3278: #pragma omp barrier |
3279: #endif |
3280: if (my_thread_num == 0) |
3281: { |
3282: for (i=0; i < max_num_threads; i++) |
3283: value_start[i+1] += value_start[i]; |
[...] |
3289: if (my_thread_num) |
3290: indx = value_start[my_thread_num-1]; |
3291: for (ii=ns; ii < ne; ii++) |
3292: { |
3293: row = rows[ii]; |
3294: n = ncols[ii]; |
3295: /* processor owns the row */ |
3296: if (row >= row_partitioning[pstart] && row < row_partitioning[pstart+1]) |
3297: { |
3298: row_local = row - row_partitioning[pstart]; |
3299: /* compute local row number */ |
3300: if (need_aux) |
3301: { |
3302: local_j = aux_j[row_local]; |
3303: local_data = aux_data[row_local]; |
3304: space = row_space[row_local]; |
3305: old_size = row_length[row_local]; |
3306: size = space - old_size; |
3307: if (size < n) |
3308: { |
3309: size = n - size; |
3310: tmp_j = hypre_CTAlloc(HYPRE_Int,size); |
3311: tmp_data = hypre_CTAlloc(HYPRE_Complex,size); |
3312: } |
3313: tmp_indx = 0; |
3314: not_found = 1; |
3315: size = old_size; |
3316: for (i=0; i < n; i++) |
3317: { |
3318: for (j=0; j < old_size; j++) |
3319: { |
3320: if (local_j[j] == cols[indx]) |
3321: { |
3322: local_data[j] = values[indx]; |
[...] |
3329: if (size < space) |
3330: { |
3331: local_j[size] = cols[indx]; |
3332: local_data[size++] = values[indx]; |
3333: } |
3334: else |
3335: { |
3336: tmp_j[tmp_indx] = cols[indx]; |
3337: tmp_data[tmp_indx++] = values[indx]; |
[...] |
3344: row_length[row_local] = size+tmp_indx; |
3345: |
3346: if (tmp_indx) |
3347: { |
3348: aux_j[row_local] = hypre_TReAlloc(aux_j[row_local],HYPRE_Int, |
3349: size+tmp_indx); |
3350: aux_data[row_local] = hypre_TReAlloc(aux_data[row_local], |
3351: HYPRE_Complex,size+tmp_indx); |
3352: row_space[row_local] = size+tmp_indx; |
3353: local_j = aux_j[row_local]; |
[...] |
3359: for (i=0; i < tmp_indx; i++) |
3360: { |
3361: local_j[cnt] = tmp_j[i]; |
3362: local_data[cnt++] = tmp_data[i]; |
3363: } |
3364: |
3365: if (tmp_j) |
3366: { |
3367: hypre_TFree(tmp_j); |
3368: hypre_TFree(tmp_data); |
[...] |
3376: offd_indx = hypre_AuxParCSRMatrixIndxOffd(aux_matrix)[row_local]; |
3377: diag_indx = hypre_AuxParCSRMatrixIndxDiag(aux_matrix)[row_local]; |
3378: cnt_diag = diag_indx; |
3379: cnt_offd = offd_indx; |
3380: diag_space = diag_i[row_local+1]; |
3381: offd_space = offd_i[row_local+1]; |
3382: not_found = 1; |
3383: for (i=0; i < n; i++) |
3384: { |
3385: if (cols[indx] < col_0 || cols[indx] > col_n) |
3386: /* insert into offd */ |
3387: { |
3388: for (j=offd_i[row_local]; j < offd_indx; j++) |
3389: { |
3390: if (offd_j[j] == cols[indx]) |
3391: { |
3392: offd_data[j] = values[indx]; |
[...] |
3399: if (cnt_offd < offd_space) |
3400: { |
3401: offd_j[cnt_offd] = cols[indx]; |
3402: offd_data[cnt_offd++] = values[indx]; |
3403: } |
3404: else |
3405: { |
3406: hypre_error(HYPRE_ERROR_GENERIC); |
3407: #ifdef HYPRE_USING_OPENMP |
3408: #pragma omp atomic |
3409: #endif |
3410: error_flag++; |
3411: if (print_level) |
3412: hypre_printf("Error in row %d ! Too many elements!\n", |
[...] |
3422: for (j=diag_i[row_local]; j < diag_indx; j++) |
3423: { |
3424: if (diag_j[j] == cols[indx]) |
3425: { |
3426: diag_data[j] = values[indx]; |
[...] |
3433: if (cnt_diag < diag_space) |
3434: { |
3435: diag_j[cnt_diag] = cols[indx]; |
3436: diag_data[cnt_diag++] = values[indx]; |
3437: } |
3438: else |
3439: { |
3440: hypre_error(HYPRE_ERROR_GENERIC); |
3441: #ifdef HYPRE_USING_OPENMP |
3442: #pragma omp atomic |
3443: #endif |
3444: error_flag++; |
3445: if (print_level) |
3446: hypre_printf("Error in row %d ! Too many elements !\n", |
[...] |
3454: indx++; |
3455: } |
3456: |
3457: hypre_AuxParCSRMatrixIndxDiag(aux_matrix)[row_local] = cnt_diag; |
3458: hypre_AuxParCSRMatrixIndxOffd(aux_matrix)[row_local] = cnt_offd; |
[...] |
3466: indx += n; |
3467: if (aux_matrix) |
3468: { |
3469: col_indx = 0; |
3470: for (i=0; i < off_proc_i_indx; i=i+2) |
3471: { |
3472: row_len = off_proc_i[i+1]; |
3473: if (off_proc_i[i] == row) |
3474: { |
3475: for (j=0; j < n; j++) |
3476: { |
3477: cnt1 = col_indx; |
3478: for (k=0; k < row_len; k++) |
3479: { |
3480: if (off_proc_j[cnt1] == cols[j]) |
3481: { |
3482: off_proc_j[cnt1++] = -1; |
3483: /*cancel_indx++;*/ |
3484: offproc_cnt[my_thread_num]++; |
0xdd80 PUSH %RBP |
0xdd81 MOV %RSP,%RBP |
0xdd84 PUSH %R15 |
0xdd86 PUSH %R14 |
0xdd88 PUSH %R13 |
0xdd8a PUSH %R12 |
0xdd8c PUSH %RBX |
0xdd8d SUB $0x88,%RSP |
0xdd94 MOV %R9,%RBX |
0xdd97 MOV %R8,-0xa0(%RBP) |
0xdd9e MOV %RCX,-0x38(%RBP) |
0xdda2 MOV %RDX,%R15 |
0xdda5 MOV %RDI,%R14 |
0xdda8 CALL 2d90 <hypre_NumActiveThreads@plt> |
0xddad MOV %RAX,%R13 |
0xddb0 CALL 2b50 <hypre_GetThreadNum@plt> |
0xddb5 MOV %RAX,%R12 |
0xddb8 MOV %R13,%RAX |
0xddbb OR %R15,%RAX |
0xddbe SHR $0x20,%RAX |
0xddc2 JE ddce |
0xddc4 MOV %R15,%RAX |
0xddc7 CQTO |
0xddc9 IDIV %R13 |
0xddcc JMP ddd6 |
0xddce MOV %R15D,%EAX |
0xddd1 XOR %EDX,%EDX |
0xddd3 DIV %R13D |
0xddd6 MOV 0xb8(%RBP),%R15 |
0xdddd LEA 0x1(%R12),%RCX |
0xdde2 CMP %RDX,%R12 |
0xdde5 MOV -0x38(%RBP),%R8 |
0xdde9 JGE ddfc |
0xddeb LEA 0x1(%RAX),%RDI |
0xddef MOV %RDI,%RSI |
0xddf2 IMUL %R12,%RSI |
0xddf6 IMUL %RCX,%RDI |
0xddfa JMP de10 |
0xddfc MOV %RAX,%RSI |
0xddff IMUL %R12,%RSI |
0xde03 ADD %RDX,%RSI |
0xde06 IMUL %RAX,%RCX |
0xde0a ADD %RDX,%RCX |
0xde0d MOV %RCX,%RDI |
0xde10 MOVQ $0,(%R15,%R12,8) |
0xde18 MOV %RSI,%R13 |
0xde1b CMP %RDI,%RSI |
0xde1e MOV %RDI,-0x30(%RBP) |
0xde22 JGE deff |
0xde28 LEA (%R15,%R12,8),%RCX |
0xde2c LEA -0x8(%R8,%RDI,8),%RDI |
0xde31 CMP %RDX,%R12 |
0xde34 CMOVL %R12,%RDX |
0xde38 IMUL %R12,%RAX |
0xde3c LEA (%RDX,%RAX,1),%RSI |
0xde40 CMP %RCX,%RDI |
0xde43 JB de74 |
0xde45 LEA (%R8,%RSI,8),%RDI |
0xde49 CMP %RDI,%RCX |
0xde4c JB de74 |
0xde4e XOR %EAX,%EAX |
0xde50 MOV %R13,%RDX |
0xde53 MOV -0x30(%RBP),%RSI |
0xde57 NOPW (%RAX,%RAX,1) |
(216) 0xde60 ADD (%R8,%RDX,8),%RAX |
(216) 0xde64 MOV %RAX,(%RCX) |
(216) 0xde67 INC %RDX |
(216) 0xde6a CMP %RDX,%RSI |
(216) 0xde6d JNE de60 |
0xde6f JMP deff |
0xde74 MOV -0x30(%RBP),%R8 |
0xde78 SUB %RSI,%R8 |
0xde7b MOV %R8,%RSI |
0xde7e AND $-0x4,%RSI |
0xde82 JE ded6 |
0xde84 LEA -0x1(%RSI),%RDI |
0xde88 MOV -0x38(%RBP),%R9 |
0xde8c LEA (%R9,%R13,8),%R9 |
0xde90 VPXOR %XMM0,%XMM0,%XMM0 |
0xde94 XOR %R10D,%R10D |
0xde97 NOPW (%RAX,%RAX,1) |
(215) 0xdea0 VPADDQ (%R9,%R10,8),%YMM0,%YMM0 |
(215) 0xdea6 ADD $0x4,%R10 |
(215) 0xdeaa CMP %RDI,%R10 |
(215) 0xdead JBE dea0 |
0xdeaf VEXTRACTI128 $0x1,%YMM0,%XMM1 |
0xdeb5 VPADDQ %XMM1,%XMM0,%XMM0 |
0xdeb9 VPSHUFD $-0x12,%XMM0,%XMM1 |
0xdebe VPADDQ %XMM1,%XMM0,%XMM0 |
0xdec2 VMOVQ %XMM0,%RDI |
0xdec7 CMP %RSI,%R8 |
0xdeca MOV -0x38(%RBP),%R8 |
0xdece MOV -0x30(%RBP),%R9 |
0xded2 JNE dee2 |
0xded4 JMP defc |
0xded6 XOR %ESI,%ESI |
0xded8 XOR %EDI,%EDI |
0xdeda MOV -0x38(%RBP),%R8 |
0xdede MOV -0x30(%RBP),%R9 |
0xdee2 ADD %RDX,%RSI |
0xdee5 ADD %RAX,%RSI |
0xdee8 NOPL (%RAX,%RAX,1) |
(214) 0xdef0 ADD (%R8,%RSI,8),%RDI |
(214) 0xdef4 INC %RSI |
(214) 0xdef7 CMP %RSI,%R9 |
(214) 0xdefa JNE def0 |
0xdefc MOV %RDI,(%RCX) |
0xdeff MOV (%R14),%ESI |
0xdf02 LEA 0x208387(%RIP),%RDI |
0xdf09 VZEROUPPER |
0xdf0c CALL 2e20 <__kmpc_barrier@plt> |
0xdf11 TEST %R12,%R12 |
0xdf14 JNE dfb4 |
0xdf1a MOV 0xc8(%RBP),%RAX |
0xdf21 TEST %RAX,%RAX |
0xdf24 JLE dfb4 |
0xdf2a MOV (%R15),%RCX |
0xdf2d CMP $0x8,%RAX |
0xdf31 JB df87 |
0xdf33 MOV %RAX,%RDX |
0xdf36 SHR $0x3,%RDX |
0xdf3a LEA 0x40(%R15),%RSI |
0xdf3e XCHG %AX,%AX |
(213) 0xdf40 ADD -0x38(%RSI),%RCX |
(213) 0xdf44 MOV %RCX,-0x38(%RSI) |
(213) 0xdf48 ADD -0x30(%RSI),%RCX |
(213) 0xdf4c MOV %RCX,-0x30(%RSI) |
(213) 0xdf50 ADD -0x28(%RSI),%RCX |
(213) 0xdf54 MOV %RCX,-0x28(%RSI) |
(213) 0xdf58 ADD -0x20(%RSI),%RCX |
(213) 0xdf5c MOV %RCX,-0x20(%RSI) |
(213) 0xdf60 ADD -0x18(%RSI),%RCX |
(213) 0xdf64 MOV %RCX,-0x18(%RSI) |
(213) 0xdf68 ADD -0x10(%RSI),%RCX |
(213) 0xdf6c MOV %RCX,-0x10(%RSI) |
(213) 0xdf70 ADD -0x8(%RSI),%RCX |
(213) 0xdf74 MOV %RCX,-0x8(%RSI) |
(213) 0xdf78 ADD (%RSI),%RCX |
(213) 0xdf7b MOV %RCX,(%RSI) |
(213) 0xdf7e ADD $0x40,%RSI |
(213) 0xdf82 DEC %RDX |
(213) 0xdf85 JNE df40 |
0xdf87 MOV %RAX,%RDX |
0xdf8a AND $-0x8,%RDX |
0xdf8e CMP %RAX,%RDX |
0xdf91 JE dfb4 |
0xdf93 NOPW %CS:(%RAX,%RAX,1) |
(212) 0xdfa0 LEA (%R15,%RDX,8),%RSI |
(212) 0xdfa4 INC %RDX |
(212) 0xdfa7 ADD 0x8(%RSI),%RCX |
(212) 0xdfab MOV %RCX,0x8(%RSI) |
(212) 0xdfaf CMP %RDX,%RAX |
(212) 0xdfb2 JNE dfa0 |
0xdfb4 MOV (%R14),%ESI |
0xdfb7 LEA 0x2082f2(%RIP),%RDI |
0xdfbe CALL 2e20 <__kmpc_barrier@plt> |
0xdfc3 TEST %R12,%R12 |
0xdfc6 JE dfcf |
0xdfc8 MOV -0x8(%R15,%R12,8),%RCX |
0xdfcd JMP dfd1 |
0xdfcf XOR %ECX,%ECX |
0xdfd1 MOV -0x38(%RBP),%RDX |
0xdfd5 MOV %R13,%RSI |
0xdfd8 MOV -0x30(%RBP),%RDI |
0xdfdc CMP %RDI,%R13 |
0xdfdf JGE eb60 |
0xdfe5 MOV 0xb0(%RBP),%RAX |
0xdfec MOV 0x98(%RBP),%R8 |
0xdff3 LEA (%RAX,%R12,8),%R15 |
0xdff7 LEA -0x1(%R8),%RAX |
0xdffb SHR $0x1,%RAX |
0xdffe MOV %RAX,-0xa8(%RBP) |
0xe005 VPCMPEQD %YMM2,%YMM2,%YMM2 |
0xe009 MOV 0x20(%RBP),%R8 |
0xe00d MOVQ $0,-0x60(%RBP) |
0xe015 JMP e040 |
0xe017 NOPW (%RAX,%RAX,1) |
(190) 0xe020 MOV -0x38(%RBP),%RDX |
(190) 0xe024 MOV -0x48(%RBP),%RSI |
(190) 0xe028 MOV -0x30(%RBP),%RDI |
(190) 0xe02c MOV -0x78(%RBP),%RCX |
(190) 0xe030 MOV 0x20(%RBP),%R8 |
(190) 0xe034 INC %RSI |
(190) 0xe037 CMP %RDI,%RSI |
(190) 0xe03a JGE eb60 |
(190) 0xe040 MOV -0xa0(%RBP),%RAX |
(190) 0xe047 MOV (%RAX,%RSI,8),%R9 |
(190) 0xe04b MOV (%RDX,%RSI,8),%R12 |
(190) 0xe04f MOV %R9,-0x70(%RBP) |
(190) 0xe053 SUB (%R8),%R9 |
(190) 0xe056 MOV %R12,-0x40(%RBP) |
(190) 0xe05a JL e0f0 |
(190) 0xe060 MOV -0x70(%RBP),%RAX |
(190) 0xe064 CMP 0x8(%R8),%RAX |
(190) 0xe068 JGE e0f0 |
(190) 0xe06e CMPQ $0,0x58(%RBP) |
(190) 0xe073 MOV %R9,-0x80(%RBP) |
(190) 0xe077 JE e2ef |
(190) 0xe07d MOV 0x38(%RBP),%RAX |
(190) 0xe081 MOV %R12,%R14 |
(190) 0xe084 MOV (%RAX,%R9,8),%R12 |
(190) 0xe088 MOV 0x40(%RBP),%RAX |
(190) 0xe08c MOV (%RAX,%R9,8),%RAX |
(190) 0xe090 MOV %RAX,-0x50(%RBP) |
(190) 0xe094 MOV 0x50(%RBP),%RAX |
(190) 0xe098 MOV (%RAX,%R9,8),%RDX |
(190) 0xe09c MOV 0x48(%RBP),%RAX |
(190) 0xe0a0 MOV (%RAX,%R9,8),%R13 |
(190) 0xe0a4 MOV %RDX,-0x88(%RBP) |
(190) 0xe0ab MOV %RDX,%RAX |
(190) 0xe0ae SUB %R13,%RAX |
(190) 0xe0b1 SUB %RAX,%R14 |
(190) 0xe0b4 MOV %RSI,-0x48(%RBP) |
(190) 0xe0b8 MOV %RCX,-0x78(%RBP) |
(190) 0xe0bc JLE e359 |
(190) 0xe0c2 MOV $0x8,%ESI |
(190) 0xe0c7 MOV %R14,%RDI |
(190) 0xe0ca VZEROUPPER |
(190) 0xe0cd CALL 2df0 <hypre_CAlloc@plt> |
(190) 0xe0d2 MOV %RAX,-0x58(%RBP) |
(190) 0xe0d6 MOV $0x8,%ESI |
(190) 0xe0db MOV %R14,%RDI |
(190) 0xe0de CALL 2df0 <hypre_CAlloc@plt> |
(190) 0xe0e3 MOV -0x58(%RBP),%R11 |
(190) 0xe0e7 MOV %RAX,-0x60(%RBP) |
(190) 0xe0eb JMP e35c |
(190) 0xe0f0 ADD %R12,%RCX |
(190) 0xe0f3 MOV 0x18(%RBP),%RAX |
(190) 0xe0f7 CMPQ $0,(%RAX) |
(190) 0xe0fb JE e034 |
(190) 0xe101 CMPQ $0,0x98(%RBP) |
(190) 0xe109 JLE e034 |
(190) 0xe10f MOV %RCX,-0x78(%RBP) |
(190) 0xe113 MOV %RSI,-0x48(%RBP) |
(190) 0xe117 LEA -0x1(%R12),%RAX |
(190) 0xe11c LEA -0x8(%RBX,%R12,8),%RCX |
(190) 0xe121 XOR %ESI,%ESI |
(190) 0xe123 XOR %EDX,%EDX |
(190) 0xe125 JMP e144 |
0xe127 NOPW (%RAX,%RAX,1) |
(191) 0xe130 LEA 0x1(%RDX),%RDI |
(191) 0xe134 CMP -0xa8(%RBP),%RDX |
(191) 0xe13b MOV %RDI,%RDX |
(191) 0xe13e JE e020 |
(191) 0xe144 MOV %RSI,%R8 |
(191) 0xe147 MOV %RDX,%R9 |
(191) 0xe14a SAL $0x4,%R9 |
(191) 0xe14e MOV 0xa0(%RBP),%R10 |
(191) 0xe155 MOV 0x8(%R10,%R9,1),%RDI |
(191) 0xe15a ADD %RDI,%RSI |
(191) 0xe15d MOV -0x70(%RBP),%R11 |
(191) 0xe161 CMP %R11,(%R10,%R9,1) |
(191) 0xe165 JNE e130 |
(191) 0xe167 TEST %R12,%R12 |
(191) 0xe16a JLE e130 |
(191) 0xe16c MOV 0xa8(%RBP),%R10 |
(191) 0xe173 LEA -0x8(%R10,%RSI,8),%R9 |
(191) 0xe178 CMP %R15,%R9 |
(191) 0xe17b SETAE %R13B |
(191) 0xe17f LEA (%R10,%R8,8),%R8 |
(191) 0xe183 CMP %R8,%R15 |
(191) 0xe186 SETAE %R12B |
(191) 0xe18a CMP %RBX,%R9 |
(191) 0xe18d SETB %R10B |
(191) 0xe191 CMP %R8,%RCX |
(191) 0xe194 SETB %R14B |
(191) 0xe198 CMP %R15,%RCX |
(191) 0xe19b SETB %R9B |
(191) 0xe19f CMP %RBX,%R15 |
(191) 0xe1a2 SETB %R11B |
(191) 0xe1a6 TEST %R12B,%R13B |
(191) 0xe1a9 JNE e2a0 |
(191) 0xe1af OR %R14B,%R10B |
(191) 0xe1b2 JE e2a0 |
(191) 0xe1b8 OR %R11B,%R9B |
(191) 0xe1bb JE e2a0 |
(191) 0xe1c1 XOR %R9D,%R9D |
(191) 0xe1c4 MOV -0x40(%RBP),%R12 |
(191) 0xe1c8 JMP e1e0 |
0xe1ca NOPW (%RAX,%RAX,1) |
(194) 0xe1d0 LEA 0x1(%R9),%R10 |
(194) 0xe1d4 CMP %RAX,%R9 |
(194) 0xe1d7 MOV %R10,%R9 |
(194) 0xe1da JE e130 |
(194) 0xe1e0 TEST %RDI,%RDI |
(194) 0xe1e3 JLE e1d0 |
(194) 0xe1e5 MOV (%RBX,%R9,8),%R10 |
(194) 0xe1e9 MOV %RDI,%R11 |
(194) 0xe1ec AND $-0x4,%R11 |
(194) 0xe1f0 JE e270 |
(194) 0xe1f2 LEA -0x1(%R11),%R14 |
(194) 0xe1f6 VPBROADCASTQ %R10,%YMM1 |
(194) 0xe1fc VPXOR %XMM0,%XMM0,%XMM0 |
(194) 0xe200 XOR %R13D,%R13D |
(194) 0xe203 NOPW %CS:(%RAX,%RAX,1) |
(197) 0xe210 VPCMPEQQ (%R8,%R13,8),%YMM1,%K1 |
(197) 0xe217 VMOVDQU64 %YMM2,(%R8,%R13,8){%K1} |
(197) 0xe21e VPSUBQ %YMM2,%YMM0,%YMM0{%K1} |
(197) 0xe224 ADD $0x4,%R13 |
(197) 0xe228 CMP %R14,%R13 |
(197) 0xe22b JLE e210 |
(194) 0xe22d VEXTRACTI128 $0x1,%YMM0,%XMM1 |
(194) 0xe233 VPADDQ %XMM1,%XMM0,%XMM0 |
(194) 0xe237 VPSHUFD $-0x12,%XMM0,%XMM1 |
(194) 0xe23c VPADDQ %XMM1,%XMM0,%XMM0 |
(194) 0xe240 VMOVQ %XMM0,%R14 |
(194) 0xe245 CMP %R11,%RDI |
(194) 0xe248 JNE e280 |
(194) 0xe24a NOPW (%RAX,%RAX,1) |
(194) 0xe250 TEST %R14,%R14 |
(194) 0xe253 JE e1d0 |
(194) 0xe259 ADD %R14,(%R15) |
(194) 0xe25c JMP e1d0 |
0xe261 NOPW %CS:(%RAX,%RAX,1) |
(194) 0xe270 XOR %R14D,%R14D |
(194) 0xe273 XOR %R11D,%R11D |
(194) 0xe276 CMP %R10,(%R8,%R11,8) |
(194) 0xe27a JE e286 |
(194) 0xe27c JMP e291 |
0xe27e XCHG %AX,%AX |
(195) 0xe280 CMP %R10,(%R8,%R11,8) |
(195) 0xe284 JNE e291 |
(196) 0xe286 MOVQ $-0x1,(%R8,%R11,8) |
(196) 0xe28e INC %R14 |
(195) 0xe291 INC %R11 |
(195) 0xe294 CMP %R11,%RDI |
(195) 0xe297 JNE e280 |
(194) 0xe299 JMP e250 |
0xe29b NOPL (%RAX,%RAX,1) |
(191) 0xe2a0 XOR %R9D,%R9D |
(191) 0xe2a3 MOV -0x40(%RBP),%R12 |
(191) 0xe2a7 JMP e2c0 |
0xe2a9 NOPL (%RAX) |
(192) 0xe2b0 LEA 0x1(%R9),%R10 |
(192) 0xe2b4 CMP %RAX,%R9 |
(192) 0xe2b7 MOV %R10,%R9 |
(192) 0xe2ba JE e130 |
(192) 0xe2c0 TEST %RDI,%RDI |
(192) 0xe2c3 JLE e2b0 |
(192) 0xe2c5 XOR %R10D,%R10D |
(192) 0xe2c8 JMP e2d8 |
0xe2ca NOPW (%RAX,%RAX,1) |
(193) 0xe2d0 INC %R10 |
(193) 0xe2d3 CMP %R10,%RDI |
(193) 0xe2d6 JE e2b0 |
(193) 0xe2d8 MOV (%R8,%R10,8),%R11 |
(193) 0xe2dc CMP (%RBX,%R9,8),%R11 |
(193) 0xe2e0 JNE e2d0 |
(193) 0xe2e2 MOVQ $-0x1,(%R8,%R10,8) |
(193) 0xe2ea INCQ (%R15) |
(193) 0xe2ed JMP e2d0 |
(190) 0xe2ef MOV %RCX,%R13 |
(190) 0xe2f2 MOV 0x18(%RBP),%RAX |
(190) 0xe2f6 MOV (%RAX),%RAX |
(190) 0xe2f9 MOV 0x38(%RAX),%RCX |
(190) 0xe2fd MOV 0x40(%RAX),%RAX |
(190) 0xe301 MOV (%RAX,%R9,8),%R10 |
(190) 0xe305 MOV (%RCX,%R9,8),%R14 |
(190) 0xe309 TEST %R12,%R12 |
(190) 0xe30c JLE eaa9 |
(190) 0xe312 MOV %RSI,-0x48(%RBP) |
(190) 0xe316 MOV 0x60(%RBP),%RAX |
(190) 0xe31a MOV 0x8(%RAX,%R9,8),%RAX |
(190) 0xe31f MOV %RAX,-0x58(%RBP) |
(190) 0xe323 MOV 0x78(%RBP),%RAX |
(190) 0xe327 MOV 0x8(%RAX,%R9,8),%RAX |
(190) 0xe32c MOV %RAX,-0x90(%RBP) |
(190) 0xe333 MOV %R13,%RCX |
(190) 0xe336 LEA (%R13,%R12,1),%RAX |
(190) 0xe33b MOV %RAX,-0x98(%RBP) |
(190) 0xe342 XOR %R8D,%R8D |
(190) 0xe345 MOV %R10,-0x68(%RBP) |
(190) 0xe349 MOV %R10,-0x50(%RBP) |
(190) 0xe34d MOV %R14,-0x88(%RBP) |
(190) 0xe354 JMP e6c0 |
(190) 0xe359 XOR %R11D,%R11D |
(190) 0xe35c MOV -0x40(%RBP),%RAX |
(190) 0xe360 TEST %RAX,%RAX |
(190) 0xe363 JLE e650 |
(190) 0xe369 DEC %RAX |
(190) 0xe36c MOV %RAX,-0x70(%RBP) |
(190) 0xe370 MOV %R12D,%EAX |
(190) 0xe373 AND $0x7f,%EAX |
(190) 0xe376 MOV $0x80,%ECX |
(190) 0xe37b SUB %EAX,%ECX |
(190) 0xe37d SHR $0x3,%ECX |
(190) 0xe380 CMP %RCX,%R13 |
(190) 0xe383 CMOVB %R13,%RCX |
(190) 0xe387 MOV %ECX,%EAX |
(190) 0xe389 LEA (%R12,%RAX,8),%RDX |
(190) 0xe38d MOV %R13,%RAX |
(190) 0xe390 SUB %RCX,%RAX |
(190) 0xe393 AND $-0x10,%RAX |
(190) 0xe397 ADD %RCX,%RAX |
(190) 0xe39a MOV %RAX,-0x90(%RBP) |
(190) 0xe3a1 MOVQ $0,-0x68(%RBP) |
(190) 0xe3a9 XOR %EDI,%EDI |
(190) 0xe3ab MOV %R13,%R14 |
(190) 0xe3ae MOV %R11,-0x58(%RBP) |
(190) 0xe3b2 JMP e3f3 |
0xe3b4 NOPW %CS:(%RAX,%RAX,1) |
(208) 0xe3c0 MOV -0x68(%RBP),%R8 |
(208) 0xe3c4 MOV %RSI,(%R11,%R8,8) |
(208) 0xe3c8 MOV 0x10(%RBP),%RSI |
(208) 0xe3cc VMOVQ (%RSI,%RAX,8),%XMM0 |
(208) 0xe3d1 MOV -0x60(%RBP),%RAX |
(208) 0xe3d5 VMOVQ %XMM0,(%RAX,%R8,8) |
(208) 0xe3db INC %R8 |
(208) 0xe3de MOV %R8,-0x68(%RBP) |
(208) 0xe3e2 LEA 0x1(%RDI),%RAX |
(208) 0xe3e6 CMP -0x70(%RBP),%RDI |
(208) 0xe3ea MOV %RAX,%RDI |
(208) 0xe3ed JE e54a |
(208) 0xe3f3 TEST %R13,%R13 |
(208) 0xe3f6 JLE e4d0 |
(208) 0xe3fc MOV -0x78(%RBP),%RAX |
(208) 0xe400 LEA (%RAX,%RDI,1),%R8 |
(208) 0xe404 MOV (%RBX,%R8,8),%R9 |
(208) 0xe408 MOV %R12D,%R10D |
(208) 0xe40b AND $0x7f,%R10D |
(208) 0xe40f MOV $0x80,%ESI |
(208) 0xe414 SUB %R10D,%ESI |
(208) 0xe417 SHR $0x3,%ESI |
(208) 0xe41a CMP %RSI,%R13 |
(208) 0xe41d MOV %RSI,%RAX |
(208) 0xe420 CMOVB %R13,%RAX |
(208) 0xe424 CMP $0x78,%R10 |
(208) 0xe428 JA e442 |
(208) 0xe42a XOR %R10D,%R10D |
(208) 0xe42d NOPL (%RAX) |
(211) 0xe430 CMP %R9,(%R12,%R10,8) |
(211) 0xe434 JE e531 |
(211) 0xe43a INC %R10 |
(211) 0xe43d CMP %R10,%RCX |
(211) 0xe440 JNE e430 |
(208) 0xe442 CMP %R13,%RSI |
(208) 0xe445 JAE e4d0 |
(208) 0xe44b MOV %R13,%R11 |
(208) 0xe44e SUB %RAX,%R11 |
(208) 0xe451 MOV %R11,%RSI |
(208) 0xe454 AND $-0x10,%RSI |
(208) 0xe458 JE e4a6 |
(208) 0xe45a LEA -0x1(%RSI),%RAX |
(208) 0xe45e VPBROADCASTQ %R9,%YMM0 |
(208) 0xe464 XOR %R10D,%R10D |
(208) 0xe467 NOPW (%RAX,%RAX,1) |
(210) 0xe470 VPCMPEQQ 0x20(%RDX,%R10,8),%YMM0,%K0 |
(210) 0xe478 VPCMPEQQ (%RDX,%R10,8),%YMM0,%K1 |
(210) 0xe47f VPCMPEQQ 0x60(%RDX,%R10,8),%YMM0,%K2 |
(210) 0xe487 VPCMPEQQ 0x40(%RDX,%R10,8),%YMM0,%K3 |
(210) 0xe48f KORB %K0,%K1,%K4 |
(210) 0xe493 KORB %K2,%K3,%K5 |
(210) 0xe497 KORTESTB %K5,%K4 |
(210) 0xe49b JNE e507 |
(210) 0xe49d ADD $0x10,%R10 |
(210) 0xe4a1 CMP %RAX,%R10 |
(210) 0xe4a4 JBE e470 |
(208) 0xe4a6 CMP %R11,%RSI |
(208) 0xe4a9 MOV -0x58(%RBP),%R11 |
(208) 0xe4ad JE e4d0 |
(208) 0xe4af MOV -0x90(%RBP),%R10 |
(208) 0xe4b6 NOPW %CS:(%RAX,%RAX,1) |
(209) 0xe4c0 CMP %R9,(%R12,%R10,8) |
(209) 0xe4c4 JE e531 |
(209) 0xe4c6 INC %R10 |
(209) 0xe4c9 CMP %R10,%R13 |
(209) 0xe4cc JNE e4c0 |
(208) 0xe4ce XCHG %AX,%AX |
(208) 0xe4d0 MOV -0x78(%RBP),%RAX |
(208) 0xe4d4 ADD %RDI,%RAX |
(208) 0xe4d7 MOV (%RBX,%RAX,8),%RSI |
(208) 0xe4db CMP -0x88(%RBP),%R14 |
(208) 0xe4e2 JGE e3c0 |
(208) 0xe4e8 MOV %RSI,(%R12,%R14,8) |
(208) 0xe4ec MOV 0x10(%RBP),%RSI |
(208) 0xe4f0 VMOVQ (%RSI,%RAX,8),%XMM0 |
(208) 0xe4f5 MOV -0x50(%RBP),%RAX |
(208) 0xe4f9 VMOVQ %XMM0,(%RAX,%R14,8) |
(208) 0xe4ff INC %R14 |
(208) 0xe502 JMP e3e2 |
(208) 0xe507 KSHIFTLB $0x4,%K0,%K0 |
(208) 0xe50d KORB %K0,%K1,%K0 |
(208) 0xe511 KSHIFTLB $0x4,%K2,%K1 |
(208) 0xe517 KORB %K1,%K3,%K1 |
(208) 0xe51b KUNPCKBW %K0,%K1,%K0 |
(208) 0xe51f KMOVD %K0,%EAX |
(208) 0xe523 TZCNT %EAX,%EAX |
(208) 0xe527 ADD %RCX,%R10 |
(208) 0xe52a ADD %RAX,%R10 |
(208) 0xe52d MOV -0x58(%RBP),%R11 |
(208) 0xe531 MOV 0x10(%RBP),%RAX |
(208) 0xe535 VMOVQ (%RAX,%R8,8),%XMM0 |
(208) 0xe53b MOV -0x50(%RBP),%RAX |
(208) 0xe53f VMOVQ %XMM0,(%RAX,%R10,8) |
(208) 0xe545 JMP e3e2 |
(190) 0xe54a MOV -0x40(%RBP),%RAX |
(190) 0xe54e ADD %RAX,-0x78(%RBP) |
(190) 0xe552 MOV -0x68(%RBP),%R13 |
(190) 0xe556 LEA (%R14,%R13,1),%RCX |
(190) 0xe55a MOV 0x48(%RBP),%RAX |
(190) 0xe55e MOV -0x80(%RBP),%R12 |
(190) 0xe562 MOV %RCX,(%RAX,%R12,8) |
(190) 0xe566 TEST %R13,%R13 |
(190) 0xe569 JE e65c |
(190) 0xe56f MOV 0x38(%RBP),%RAX |
(190) 0xe573 MOV (%RAX,%R12,8),%RDI |
(190) 0xe577 LEA (,%RCX,8),%RSI |
(190) 0xe57f MOV %RSI,-0x40(%RBP) |
(190) 0xe583 MOV %RCX,-0x70(%RBP) |
(190) 0xe587 VZEROUPPER |
(190) 0xe58a CALL 2b70 <hypre_ReAlloc@plt> |
(190) 0xe58f MOV 0x38(%RBP),%RCX |
(190) 0xe593 MOV %RAX,(%RCX,%R12,8) |
(190) 0xe597 MOV 0x40(%RBP),%RAX |
(190) 0xe59b MOV (%RAX,%R12,8),%RDI |
(190) 0xe59f MOV -0x40(%RBP),%RSI |
(190) 0xe5a3 CALL 2b70 <hypre_ReAlloc@plt> |
(190) 0xe5a8 MOV -0x70(%RBP),%R9 |
(190) 0xe5ac MOV 0x40(%RBP),%RCX |
(190) 0xe5b0 MOV %RAX,(%RCX,%R12,8) |
(190) 0xe5b4 MOV 0x50(%RBP),%RCX |
(190) 0xe5b8 MOV %R9,(%RCX,%R12,8) |
(190) 0xe5bc TEST %R13,%R13 |
(190) 0xe5bf JLE e9d4 |
(190) 0xe5c5 MOV 0x38(%RBP),%RCX |
(190) 0xe5c9 MOV (%RCX,%R12,8),%RCX |
(190) 0xe5cd MOV -0x58(%RBP),%R11 |
(190) 0xe5d1 LEA -0x8(%R11,%R13,8),%RDX |
(190) 0xe5d6 LEA (%RCX,%R14,8),%RDI |
(190) 0xe5da CMP %RDI,%RDX |
(190) 0xe5dd SETAE %DL |
(190) 0xe5e0 LEA -0x8(%RCX,%R9,8),%RCX |
(190) 0xe5e5 CMP %R11,%RCX |
(190) 0xe5e8 SETAE %SIL |
(190) 0xe5ec MOV -0x60(%RBP),%R8 |
(190) 0xe5f0 LEA -0x8(%R8,%R13,8),%RCX |
(190) 0xe5f5 LEA (%RAX,%R14,8),%R12 |
(190) 0xe5f9 CMP %R12,%RCX |
(190) 0xe5fc SETB %CL |
(190) 0xe5ff LEA -0x8(%RAX,%R9,8),%RAX |
(190) 0xe604 CMP %R8,%RAX |
(190) 0xe607 SETB %AL |
(190) 0xe60a TEST %SIL,%DL |
(190) 0xe60d JNE e9e6 |
(190) 0xe613 OR %AL,%CL |
(190) 0xe615 JE e9e6 |
(190) 0xe61b MOV -0x68(%RBP),%R14 |
(190) 0xe61f CMP $0xd,%R14 |
(190) 0xe623 JB eacc |
(190) 0xe629 SAL $0x3,%R14 |
(190) 0xe62d MOV %R11,%RSI |
(190) 0xe630 MOV %R14,%RDX |
(190) 0xe633 CALL 2de0 <__intel_avx_rep_memcpy@plt> |
(190) 0xe638 MOV %R12,%RDI |
(190) 0xe63b MOV -0x60(%RBP),%RSI |
(190) 0xe63f MOV %R14,%RDX |
(190) 0xe642 CALL 2de0 <__intel_avx_rep_memcpy@plt> |
(190) 0xe647 MOV -0x58(%RBP),%R11 |
(190) 0xe64b JMP eb2b |
(190) 0xe650 MOV 0x48(%RBP),%RAX |
(190) 0xe654 MOV -0x80(%RBP),%RCX |
(190) 0xe658 MOV %R13,(%RAX,%RCX,8) |
(190) 0xe65c TEST %R11,%R11 |
(190) 0xe65f JNE eb2b |
(190) 0xe665 JMP eb47 |
(198) 0xe66a KSHIFTLB $0x4,%K0,%K0 |
(198) 0xe670 KORB %K0,%K1,%K0 |
(198) 0xe674 KSHIFTLB $0x4,%K2,%K1 |
(198) 0xe67a KORB %K1,%K3,%K1 |
(198) 0xe67e KUNPCKBW %K0,%K1,%K0 |
(198) 0xe682 KMOVD %K0,%EAX |
(198) 0xe686 TZCNT %EAX,%EAX |
(198) 0xe68a ADD %RDI,%R13 |
(198) 0xe68d ADD %RAX,%R13 |
(198) 0xe690 MOV -0x30(%RBP),%RDI |
(198) 0xe694 MOV %R12,%RCX |
(198) 0xe697 MOV -0x40(%RBP),%R12 |
(198) 0xe69b MOV 0x10(%RBP),%RAX |
(198) 0xe69f VMOVQ (%RAX,%RCX,8),%XMM0 |
(198) 0xe6a4 MOV 0x88(%RBP),%RAX |
(198) 0xe6ab VMOVQ %XMM0,(%RAX,%R13,8) |
(198) 0xe6b1 INC %RCX |
(198) 0xe6b4 INC %R8 |
(198) 0xe6b7 CMP %R12,%R8 |
(198) 0xe6ba JE e9c0 |
(198) 0xe6c0 MOV (%RBX,%RCX,8),%R9 |
(198) 0xe6c4 CMP 0x28(%RBP),%R9 |
(198) 0xe6c8 JL e820 |
(198) 0xe6ce CMP 0x30(%RBP),%R9 |
(198) 0xe6d2 JG e820 |
(198) 0xe6d8 MOV 0x60(%RBP),%RAX |
(198) 0xe6dc MOV -0x80(%RBP),%RDX |
(198) 0xe6e0 MOV (%RAX,%RDX,8),%R10 |
(198) 0xe6e4 MOV -0x88(%RBP),%R11 |
(198) 0xe6eb SUB %R10,%R11 |
(198) 0xe6ee JLE e7f0 |
(198) 0xe6f4 MOV 0x68(%RBP),%RAX |
(198) 0xe6f8 LEA (%RAX,%R10,8),%EAX |
(198) 0xe6fc AND $0x7f,%EAX |
(198) 0xe6ff MOV $0x80,%EDX |
(198) 0xe704 SUB %EAX,%EDX |
(198) 0xe706 SHR $0x3,%EDX |
(198) 0xe709 CMP %RDX,%R11 |
(198) 0xe70c MOV %RDX,%RSI |
(198) 0xe70f CMOVB %R11,%RSI |
(198) 0xe713 TEST %RSI,%RSI |
(198) 0xe716 JE e736 |
(198) 0xe718 MOV %R10,%R13 |
(198) 0xe71b MOV %RSI,%RAX |
(198) 0xe71e XCHG %AX,%AX |
(204) 0xe720 MOV 0x68(%RBP),%R12 |
(204) 0xe724 CMP %R9,(%R12,%R13,8) |
(204) 0xe728 JE e9aa |
(204) 0xe72e INC %R13 |
(204) 0xe731 DEC %RAX |
(204) 0xe734 JNE e720 |
(198) 0xe736 CMP %R11,%RDX |
(198) 0xe739 MOV -0x40(%RBP),%R12 |
(198) 0xe73d JAE e7f0 |
(198) 0xe743 MOV %RCX,%R12 |
(198) 0xe746 SUB %RSI,%R11 |
(198) 0xe749 MOV %R11,%RDX |
(198) 0xe74c AND $-0x10,%RDX |
(198) 0xe750 JE e7aa |
(198) 0xe752 LEA -0x1(%RDX),%RAX |
(198) 0xe756 VPBROADCASTQ %R9,%YMM0 |
(198) 0xe75c LEA (%R10,%RSI,1),%R13 |
(198) 0xe760 MOV 0x68(%RBP),%RCX |
(198) 0xe764 LEA (%RCX,%R13,8),%RCX |
(198) 0xe768 XOR %EDI,%EDI |
(198) 0xe76a NOPW (%RAX,%RAX,1) |
(203) 0xe770 VPCMPEQQ 0x20(%RCX,%RDI,8),%YMM0,%K0 |
(203) 0xe778 VPCMPEQQ (%RCX,%RDI,8),%YMM0,%K1 |
(203) 0xe77f VPCMPEQQ 0x60(%RCX,%RDI,8),%YMM0,%K2 |
(203) 0xe787 VPCMPEQQ 0x40(%RCX,%RDI,8),%YMM0,%K3 |
(203) 0xe78f KORB %K0,%K1,%K4 |
(203) 0xe793 KORB %K2,%K3,%K5 |
(203) 0xe797 KORTESTB %K5,%K4 |
(203) 0xe79b JNE e97d |
(203) 0xe7a1 ADD $0x10,%RDI |
(203) 0xe7a5 CMP %RAX,%RDI |
(203) 0xe7a8 JBE e770 |
(198) 0xe7aa CMP %R11,%RDX |
(198) 0xe7ad MOV -0x30(%RBP),%RDI |
(198) 0xe7b1 MOV %R12,%RCX |
(198) 0xe7b4 MOV -0x40(%RBP),%R12 |
(198) 0xe7b8 JE e7f0 |
(198) 0xe7ba ADD %RSI,%R10 |
(198) 0xe7bd ADD %RDX,%R10 |
(198) 0xe7c0 MOV %R10,%R13 |
(198) 0xe7c3 NOPW %CS:(%RAX,%RAX,1) |
(202) 0xe7d0 MOV 0x68(%RBP),%RAX |
(202) 0xe7d4 CMP %R9,(%RAX,%R13,8) |
(202) 0xe7d8 JE e9ae |
(202) 0xe7de INC %R13 |
(202) 0xe7e1 CMP %R13,-0x88(%RBP) |
(202) 0xe7e8 JNE e7d0 |
(198) 0xe7ea NOPW (%RAX,%RAX,1) |
(198) 0xe7f0 CMP -0x58(%RBP),%R14 |
(198) 0xe7f4 JGE ea4c |
(198) 0xe7fa MOV 0x68(%RBP),%RAX |
(198) 0xe7fe MOV %R9,(%RAX,%R14,8) |
(198) 0xe802 MOV 0x10(%RBP),%RAX |
(198) 0xe806 VMOVQ (%RAX,%RCX,8),%XMM0 |
(198) 0xe80b MOV 0x70(%RBP),%RAX |
(198) 0xe80f VMOVQ %XMM0,(%RAX,%R14,8) |
(198) 0xe815 INC %R14 |
(198) 0xe818 JMP e6b1 |
0xe81d NOPL (%RAX) |
(198) 0xe820 MOV 0x78(%RBP),%RAX |
(198) 0xe824 MOV -0x80(%RBP),%RDX |
(198) 0xe828 MOV (%RAX,%RDX,8),%R10 |
(198) 0xe82c MOV -0x68(%RBP),%R11 |
(198) 0xe830 SUB %R10,%R11 |
(198) 0xe833 JLE e940 |
(198) 0xe839 MOV 0x80(%RBP),%RAX |
(198) 0xe840 LEA (%RAX,%R10,8),%EAX |
(198) 0xe844 AND $0x7f,%EAX |
(198) 0xe847 MOV $0x80,%EDX |
(198) 0xe84c SUB %EAX,%EDX |
(198) 0xe84e SHR $0x3,%EDX |
(198) 0xe851 CMP %RDX,%R11 |
(198) 0xe854 MOV %RDX,%RSI |
(198) 0xe857 CMOVB %R11,%RSI |
(198) 0xe85b TEST %RSI,%RSI |
(198) 0xe85e JE e889 |
(198) 0xe860 MOV %R10,%R13 |
(198) 0xe863 MOV %RSI,%RAX |
(198) 0xe866 NOPW %CS:(%RAX,%RAX,1) |
(201) 0xe870 MOV 0x80(%RBP),%R12 |
(201) 0xe877 CMP %R9,(%R12,%R13,8) |
(201) 0xe87b JE e697 |
(201) 0xe881 INC %R13 |
(201) 0xe884 DEC %RAX |
(201) 0xe887 JNE e870 |
(198) 0xe889 CMP %R11,%RDX |
(198) 0xe88c MOV -0x40(%RBP),%R12 |
(198) 0xe890 JAE e940 |
(198) 0xe896 MOV %RCX,%R12 |
(198) 0xe899 SUB %RSI,%R11 |
(198) 0xe89c MOV %R11,%RDX |
(198) 0xe89f AND $-0x10,%RDX |
(198) 0xe8a3 JE e8fa |
(198) 0xe8a5 LEA -0x1(%RDX),%RAX |
(198) 0xe8a9 VPBROADCASTQ %R9,%YMM0 |
(198) 0xe8af LEA (%R10,%RSI,1),%R13 |
(198) 0xe8b3 MOV 0x80(%RBP),%RCX |
(198) 0xe8ba LEA (%RCX,%R13,8),%RCX |
(198) 0xe8be XOR %EDI,%EDI |
(200) 0xe8c0 VPCMPEQQ 0x20(%RCX,%RDI,8),%YMM0,%K0 |
(200) 0xe8c8 VPCMPEQQ (%RCX,%RDI,8),%YMM0,%K1 |
(200) 0xe8cf VPCMPEQQ 0x60(%RCX,%RDI,8),%YMM0,%K2 |
(200) 0xe8d7 VPCMPEQQ 0x40(%RCX,%RDI,8),%YMM0,%K3 |
(200) 0xe8df KORB %K0,%K1,%K4 |
(200) 0xe8e3 KORB %K2,%K3,%K5 |
(200) 0xe8e7 KORTESTB %K5,%K4 |
(200) 0xe8eb JNE e66a |
(200) 0xe8f1 ADD $0x10,%RDI |
(200) 0xe8f5 CMP %RAX,%RDI |
(200) 0xe8f8 JBE e8c0 |
(198) 0xe8fa CMP %R11,%RDX |
(198) 0xe8fd MOV -0x30(%RBP),%RDI |
(198) 0xe901 MOV %R12,%RCX |
(198) 0xe904 MOV -0x40(%RBP),%R12 |
(198) 0xe908 JE e940 |
(198) 0xe90a ADD %RSI,%R10 |
(198) 0xe90d ADD %RDX,%R10 |
(198) 0xe910 MOV %R10,%R13 |
(198) 0xe913 NOPW %CS:(%RAX,%RAX,1) |
(199) 0xe920 MOV 0x80(%RBP),%RAX |
(199) 0xe927 CMP %R9,(%RAX,%R13,8) |
(199) 0xe92b JE e69b |
(199) 0xe931 INC %R13 |
(199) 0xe934 CMP %R13,-0x68(%RBP) |
(199) 0xe938 JNE e920 |
(198) 0xe93a NOPW (%RAX,%RAX,1) |
(198) 0xe940 MOV -0x50(%RBP),%RDX |
(198) 0xe944 CMP -0x90(%RBP),%RDX |
(198) 0xe94b JGE ea10 |
(198) 0xe951 MOV 0x80(%RBP),%RAX |
(198) 0xe958 MOV %R9,(%RAX,%RDX,8) |
(198) 0xe95c MOV 0x10(%RBP),%RAX |
(198) 0xe960 VMOVQ (%RAX,%RCX,8),%XMM0 |
(198) 0xe965 MOV 0x88(%RBP),%RAX |
(198) 0xe96c VMOVQ %XMM0,(%RAX,%RDX,8) |
(198) 0xe971 INC %RDX |
(198) 0xe974 MOV %RDX,-0x50(%RBP) |
(198) 0xe978 JMP e6b1 |
(198) 0xe97d KSHIFTLB $0x4,%K0,%K0 |
(198) 0xe983 KORB %K0,%K1,%K0 |
(198) 0xe987 KSHIFTLB $0x4,%K2,%K1 |
(198) 0xe98d KORB %K1,%K3,%K1 |
(198) 0xe991 KUNPCKBW %K0,%K1,%K0 |
(198) 0xe995 KMOVD %K0,%EAX |
(198) 0xe999 TZCNT %EAX,%EAX |
(198) 0xe99d ADD %RDI,%R13 |
(198) 0xe9a0 ADD %RAX,%R13 |
(198) 0xe9a3 MOV -0x30(%RBP),%RDI |
(198) 0xe9a7 MOV %R12,%RCX |
(198) 0xe9aa MOV -0x40(%RBP),%R12 |
(198) 0xe9ae MOV 0x10(%RBP),%RAX |
(198) 0xe9b2 VMOVQ (%RAX,%RCX,8),%XMM0 |
(198) 0xe9b7 MOV 0x70(%RBP),%RAX |
(198) 0xe9bb JMP e6ab |
(190) 0xe9c0 MOV -0x98(%RBP),%R13 |
(190) 0xe9c7 MOV -0x38(%RBP),%RDX |
(190) 0xe9cb MOV -0x48(%RBP),%RSI |
(190) 0xe9cf JMP eaa1 |
(190) 0xe9d4 MOV -0x58(%RBP),%R11 |
(190) 0xe9d8 TEST %R11,%R11 |
(190) 0xe9db JNE eb2b |
(190) 0xe9e1 JMP eb47 |
(190) 0xe9e6 XOR %EAX,%EAX |
(190) 0xe9e8 MOV -0x60(%RBP),%RDX |
(190) 0xe9ec MOV -0x68(%RBP),%RSI |
(205) 0xe9f0 MOV (%R11,%RAX,8),%RCX |
(205) 0xe9f4 MOV %RCX,(%RDI,%RAX,8) |
(205) 0xe9f8 VMOVQ (%RDX,%RAX,8),%XMM0 |
(205) 0xe9fd VMOVQ %XMM0,(%R12,%RAX,8) |
(205) 0xea03 INC %RAX |
(205) 0xea06 CMP %RAX,%RSI |
(205) 0xea09 JNE e9f0 |
(190) 0xea0b JMP eb2b |
(190) 0xea10 MOV %RCX,%R13 |
(190) 0xea13 MOV $0xd4e,%ESI |
(190) 0xea18 MOV $0x1,%EDX |
(190) 0xea1d LEA 0x447b(%RIP),%RDI |
(190) 0xea24 XOR %ECX,%ECX |
(190) 0xea26 VZEROUPPER |
(190) 0xea29 CALL 2eb0 <hypre_error_handler@plt> |
(190) 0xea2e MOV 0xd0(%RBP),%RAX |
(190) 0xea35 LOCK INCQ (%RAX) |
(190) 0xea39 CMPQ $0,0xc0(%RBP) |
(190) 0xea41 JE ea91 |
(190) 0xea43 LEA 0x45b1(%RIP),%RDI |
(190) 0xea4a JMP ea86 |
(190) 0xea4c MOV %RCX,%R13 |
(190) 0xea4f MOV $0xd70,%ESI |
(190) 0xea54 MOV $0x1,%EDX |
(190) 0xea59 LEA 0x443f(%RIP),%RDI |
(190) 0xea60 XOR %ECX,%ECX |
(190) 0xea62 VZEROUPPER |
(190) 0xea65 CALL 2eb0 <hypre_error_handler@plt> |
(190) 0xea6a MOV 0xd0(%RBP),%RAX |
(190) 0xea71 LOCK INCQ (%RAX) |
(190) 0xea75 CMPQ $0,0xc0(%RBP) |
(190) 0xea7d JE ea91 |
(190) 0xea7f LEA 0x459b(%RIP),%RDI |
(190) 0xea86 MOV -0x70(%RBP),%RSI |
(190) 0xea8a XOR %EAX,%EAX |
(190) 0xea8c CALL 2ea0 <hypre_printf@plt> |
(190) 0xea91 MOV -0x38(%RBP),%RDX |
(190) 0xea95 MOV -0x48(%RBP),%RSI |
(190) 0xea99 MOV -0x30(%RBP),%RDI |
(190) 0xea9d VPCMPEQD %YMM2,%YMM2,%YMM2 |
(190) 0xeaa1 MOV 0x20(%RBP),%R8 |
(190) 0xeaa5 MOV -0x50(%RBP),%R10 |
(190) 0xeaa9 MOV 0x18(%RBP),%RAX |
(190) 0xeaad MOV (%RAX),%RAX |
(190) 0xeab0 MOV 0x38(%RAX),%RCX |
(190) 0xeab4 MOV -0x80(%RBP),%R9 |
(190) 0xeab8 MOV %R14,(%RCX,%R9,8) |
(190) 0xeabc MOV 0x40(%RAX),%RAX |
(190) 0xeac0 MOV %R10,(%RAX,%R9,8) |
(190) 0xeac4 MOV %R13,%RCX |
(190) 0xeac7 JMP e034 |
(190) 0xeacc MOV %R14,%RAX |
(190) 0xeacf AND $-0x4,%RAX |
(190) 0xead3 JE eb06 |
(190) 0xead5 LEA -0x1(%RAX),%RCX |
(190) 0xead9 XOR %EDX,%EDX |
(190) 0xeadb MOV -0x60(%RBP),%RSI |
(190) 0xeadf NOP |
(207) 0xeae0 VMOVUPS (%R11,%RDX,8),%YMM0 |
(207) 0xeae6 VMOVUPS %YMM0,(%RDI,%RDX,8) |
(207) 0xeaeb VMOVDQU (%RSI,%RDX,8),%YMM0 |
(207) 0xeaf0 VMOVDQU %YMM0,(%R12,%RDX,8) |
(207) 0xeaf6 ADD $0x4,%RDX |
(207) 0xeafa CMP %RCX,%RDX |
(207) 0xeafd JLE eae0 |
(190) 0xeaff CMP %RAX,%R14 |
(190) 0xeb02 JNE eb08 |
(190) 0xeb04 JMP eb2b |
(190) 0xeb06 XOR %EAX,%EAX |
(190) 0xeb08 MOV -0x60(%RBP),%RDX |
(190) 0xeb0c NOPL (%RAX) |
(206) 0xeb10 MOV (%R11,%RAX,8),%RCX |
(206) 0xeb14 MOV %RCX,(%RDI,%RAX,8) |
(206) 0xeb18 VMOVQ (%RDX,%RAX,8),%XMM0 |
(206) 0xeb1d VMOVQ %XMM0,(%R12,%RAX,8) |
(206) 0xeb23 INC %RAX |
(206) 0xeb26 CMP %RAX,%R14 |
(206) 0xeb29 JNE eb10 |
(190) 0xeb2b MOV %R11,%RDI |
(190) 0xeb2e VZEROUPPER |
(190) 0xeb31 CALL 2c50 <hypre_Free@plt> |
(190) 0xeb36 MOV -0x60(%RBP),%RDI |
(190) 0xeb3a CALL 2c50 <hypre_Free@plt> |
(190) 0xeb3f MOVQ $0,-0x60(%RBP) |
(190) 0xeb47 MOV -0x38(%RBP),%RDX |
(190) 0xeb4b MOV -0x48(%RBP),%RSI |
(190) 0xeb4f MOV -0x30(%RBP),%RDI |
(190) 0xeb53 MOV -0x78(%RBP),%RCX |
(190) 0xeb57 VPCMPEQD %YMM2,%YMM2,%YMM2 |
(190) 0xeb5b JMP e030 |
0xeb60 ADD $0x88,%RSP |
0xeb67 POP %RBX |
0xeb68 POP %R12 |
0xeb6a POP %R13 |
0xeb6c POP %R14 |
0xeb6e POP %R15 |
0xeb70 POP %RBP |
0xeb71 VZEROUPPER |
0xeb74 RET |
0xeb75 NOPW %CS:(%RAX,%RAX,1) |
0xeb7f NOP |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | __kmp_invoke_microtask | libiomp5.so | |
○ | __kmp_invoke_task_func | libiomp5.so |
Path / |
Source file and lines | IJMatrix_parcsr.c:3240-3484 |
Module | libIJ_mv.so |
nb instructions | 159 |
nb uops | 172 |
loop length | 636 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 28.67 cycles |
front end | 28.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.70 | 12.00 | 8.67 | 8.67 | 8.50 | 11.80 | 11.70 | 8.50 | 8.50 | 8.50 | 11.80 | 8.67 |
cycles | 11.70 | 16.40 | 8.67 | 8.67 | 8.50 | 11.80 | 11.70 | 8.50 | 8.50 | 8.50 | 11.80 | 8.67 |
Cycles executing div or sqrt instructions | 16.00 |
FE+BE cycles | 27.36-27.39 |
Stall cycles | 0.00 |
Front-end | 28.67 |
Dispatch | 16.40 |
DIV/SQRT | 16.00 |
Overall L1 | 28.67 |
all | 24% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 27% |
all | 15% |
load | NA (no load vectorizable/vectorized instructions) |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x88,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 2d90 <hypre_NumActiveThreads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 2b50 <hypre_GetThreadNum@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
OR %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
JE ddce <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x4e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
JMP ddd6 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x56> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV 0xb8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R12),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE ddfc <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x7c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x1(%RAX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JMP de10 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x90> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %RAX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVQ $0,(%R15,%R12,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE deff <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R15,%R12,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x8(%R8,%RDI,8),%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
CMP %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVL %R12,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IMUL %R12,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%RAX,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB de74 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xf4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R8,%RSI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB de74 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xf4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP deff <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x30(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE ded6 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x156> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R9,%R13,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VEXTRACTI128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPSHUFD $-0x12,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM0,%RDI | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
CMP %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE dee2 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x162> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP defc <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,(%RCX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R14),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x208387(%RIP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 2e20 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R12,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE dfb4 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x234> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE dfb4 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x234> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%R15),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB df87 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x207> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x40(%R15),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE dfb4 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x234> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%R14),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x2082f2(%RIP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 2e20 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R12,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE dfcf <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x24f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x8(%R15,%R12,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP dfd1 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x251> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDI,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE eb60 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xde0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x98(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R12,8),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%R8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPEQD %YMM2,%YMM2,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV 0x20(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP e040 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x2c0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x88,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | IJMatrix_parcsr.c:3240-3484 |
Module | libIJ_mv.so |
nb instructions | 159 |
nb uops | 172 |
loop length | 636 |
used x86 registers | 15 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 2 |
used zmm registers | 0 |
nb stack references | 10 |
micro-operation queue | 28.67 cycles |
front end | 28.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 11.70 | 12.00 | 8.67 | 8.67 | 8.50 | 11.80 | 11.70 | 8.50 | 8.50 | 8.50 | 11.80 | 8.67 |
cycles | 11.70 | 16.40 | 8.67 | 8.67 | 8.50 | 11.80 | 11.70 | 8.50 | 8.50 | 8.50 | 11.80 | 8.67 |
Cycles executing div or sqrt instructions | 16.00 |
FE+BE cycles | 27.36-27.39 |
Stall cycles | 0.00 |
Front-end | 28.67 |
Dispatch | 16.40 |
DIV/SQRT | 16.00 |
Overall L1 | 28.67 |
all | 24% |
load | NA (no load vectorizable/vectorized instructions) |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 100% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 27% |
all | 15% |
load | NA (no load vectorizable/vectorized instructions) |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 25% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 9% |
other | 16% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x88,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R9,%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R8,-0xa0(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RCX,-0x38(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RDX,%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %RDI,%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 2d90 <hypre_NumActiveThreads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 2b50 <hypre_GetThreadNum@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %RAX,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV %R13,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
OR %R15,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
SHR $0x20,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
JE ddce <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x4e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %R15,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CQTO | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R13 | 5 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 10 |
JMP ddd6 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x56> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %R15D,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
DIV %R13D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
MOV 0xb8(%RBP),%R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x1(%R12),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JGE ddfc <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x7c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA 0x1(%RAX),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
IMUL %RCX,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
JMP de10 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x90> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
MOV %RAX,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
IMUL %R12,%RSI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
IMUL %RAX,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %RDX,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RCX,%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOVQ $0,(%R15,%R12,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV %RSI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CMP %RDI,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %RDI,-0x30(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JGE deff <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R15,%R12,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x8(%R8,%RDI,8),%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
CMP %RDX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMOVL %R12,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IMUL %R12,%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
LEA (%RDX,%RAX,1),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RCX,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB de74 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xf4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA (%R8,%RSI,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CMP %RDI,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB de74 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xf4> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %R13,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP deff <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17f> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
MOV -0x30(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SUB %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV %R8,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x4,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE ded6 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x156> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
LEA -0x1(%RSI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%R9,%R13,8),%R9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VPXOR %XMM0,%XMM0,%XMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %R10D,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VEXTRACTI128 $0x1,%YMM0,%XMM1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VPSHUFD $-0x12,%XMM0,%XMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
VPADDQ %XMM1,%XMM0,%XMM0 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
VMOVQ %XMM0,%RDI | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
CMP %RSI,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JNE dee2 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x162> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
JMP defc <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x17c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ESI,%ESI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XOR %EDI,%EDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV -0x30(%RBP),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
ADD %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %RAX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RDI,(%RCX) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV (%R14),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x208387(%RIP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
CALL 2e20 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R12,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JNE dfb4 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x234> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xc8(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %RAX,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE dfb4 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x234> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV (%R15),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP $0x8,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JB df87 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x207> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
SHR $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA 0x40(%R15),%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV %RAX,%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $-0x8,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %RAX,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE dfb4 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x234> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV (%R14),%ESI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA 0x2082f2(%RIP),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
CALL 2e20 <__kmpc_barrier@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
TEST %R12,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JE dfcf <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x24f> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV -0x8(%R15,%R12,8),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
JMP dfd1 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x251> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R13,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV -0x30(%RBP),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CMP %RDI,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE eb60 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0xde0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0xb0(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x98(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RAX,%R12,8),%R15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x1(%R8),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
SHR $0x1,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV %RAX,-0xa8(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VPCMPEQD %YMM2,%YMM2,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
MOV 0x20(%RBP),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVQ $0,-0x60(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
JMP e040 <hypre_IJMatrixSetValuesOMPParCSR.extracted.28+0x2c0> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
XCHG %AX,%AX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x88,%RSP | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
VZEROUPPER | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_IJMatrixSetValuesOMPParCSR.extracted.28– | 0.39 | 0.08 |
○Loop 213 - IJMatrix_parcsr.c:3282-3283 - libIJ_mv.so | 0.21 | 0.04 |
▼Loop 190 - IJMatrix_parcsr.c:3262-3484 - libIJ_mv.so– | 0.04 | 0.01 |
▼Loop 198 - IJMatrix_parcsr.c:3262-3454 - libIJ_mv.so– | 0.34 | 0.06 |
○Loop 203 - IJMatrix_parcsr.c:3422-3424 - libIJ_mv.so | 0 | 0 |
○Loop 204 - IJMatrix_parcsr.c:3422-3424 - libIJ_mv.so | 0 | 0 |
○Loop 199 - IJMatrix_parcsr.c:3388-3390 - libIJ_mv.so | 0 | 0 |
○Loop 202 - IJMatrix_parcsr.c:3422-3424 - libIJ_mv.so | 0 | 0 |
○Loop 200 - IJMatrix_parcsr.c:3388-3424 - libIJ_mv.so | 0 | 0 |
○Loop 201 - IJMatrix_parcsr.c:3388-3390 - libIJ_mv.so | 0 | 0 |
○Loop 207 - IJMatrix_parcsr.c:3359-3362 - libIJ_mv.so | 0.18 | 0.03 |
▼Loop 208 - IJMatrix_parcsr.c:3262-3337 - libIJ_mv.so– | 0 | 0 |
○Loop 209 - IJMatrix_parcsr.c:3318-3320 - libIJ_mv.so | 0 | 0 |
○Loop 211 - IJMatrix_parcsr.c:3318-3320 - libIJ_mv.so | 0 | 0 |
○Loop 210 - IJMatrix_parcsr.c:3318-3320 - libIJ_mv.so | 0 | 0 |
○Loop 205 - IJMatrix_parcsr.c:3359-3362 - libIJ_mv.so | 0 | 0 |
○Loop 206 - IJMatrix_parcsr.c:3359-3362 - libIJ_mv.so | 0 | 0 |
▼Loop 191 - IJMatrix_parcsr.c:3262-3484 - libIJ_mv.so– | 0 | 0 |
▼Loop 192 - IJMatrix_parcsr.c:3475-3484 - libIJ_mv.so– | 0 | 0 |
○Loop 193 - IJMatrix_parcsr.c:3478-3484 - libIJ_mv.so | 0 | 0 |
▼Loop 194 - IJMatrix_parcsr.c:3262-3484 - libIJ_mv.so– | 0 | 0 |
○Loop 197 - IJMatrix_parcsr.c:3262-3482 - libIJ_mv.so | 0 | 0 |
▼Loop 196 - IJMatrix_parcsr.c:3262-3482 - libIJ_mv.so– | 0 | 0 |
○Loop 195 - IJMatrix_parcsr.c:3478-3480 - libIJ_mv.so | 0 | 0 |
○Loop 215 - IJMatrix_parcsr.c:3274-3275 - libIJ_mv.so | 0.01 | 0 |
○Loop 212 - IJMatrix_parcsr.c:3282-3283 - libIJ_mv.so | 0 | 0 |
○Loop 216 - IJMatrix_parcsr.c:3274-3275 - libIJ_mv.so | 0 | 0 |
○Loop 214 - IJMatrix_parcsr.c:3274-3275 - libIJ_mv.so | 0 | 0 |