Function: hypre_BoomerAMGCorrectCFMarker | Module: exec | Source: par_strength.c:2307-2320 | Coverage: 0.03% |
---|
Function: hypre_BoomerAMGCorrectCFMarker | Module: exec | Source: par_strength.c:2307-2320 | Coverage: 0.03% |
---|
/home/kcamus/qaas_runs/169-443-9681/intel/AMG/build/AMG/AMG/parcsr_ls/par_strength.c: 2307 - 2320 |
-------------------------------------------------------------------------------- |
2307: { |
2308: HYPRE_Int i, cnt; |
2309: |
2310: cnt = 0; |
2311: for (i=0; i < num_var; i++) |
2312: { |
2313: if (CF_marker[i] > 0 ) |
2314: { |
2315: if (CF_marker[i] == 1) CF_marker[i] = new_CF_marker[cnt++]; |
2316: else { CF_marker[i] = 1; cnt++;} |
2317: } |
2318: } |
2319: |
2320: return 0; |
0x489170 PUSH %RBP |
0x489171 MOV %RSP,%RBP |
0x489174 TEST %RSI,%RSI |
0x489177 JLE 48918e |
0x489179 CMP $0x4,%RSI |
0x48917d JAE 4891ca |
0x48917f XOR %R8D,%R8D |
0x489182 MOV %RSI,%RAX |
0x489185 AND $-0x4,%RAX |
0x489189 CMP %RSI,%RAX |
0x48918c JB 4891af |
0x48918e XOR %EAX,%EAX |
0x489190 POP %RBP |
0x489191 RET |
0x489192 NOPW %CS:(%RAX,%RAX,1) |
(2271) 0x4891a0 INC %R8 |
(2271) 0x4891a3 MOV %R9,(%RDI,%RAX,8) |
(2271) 0x4891a7 INC %RAX |
(2271) 0x4891aa CMP %RAX,%RSI |
(2271) 0x4891ad JE 48918e |
(2271) 0x4891af MOV (%RDI,%RAX,8),%RCX |
(2271) 0x4891b3 TEST %RCX,%RCX |
(2271) 0x4891b6 JLE 4891a7 |
(2271) 0x4891b8 MOV $0x1,%R9D |
(2271) 0x4891be CMP $0x1,%RCX |
(2271) 0x4891c2 JNE 4891a0 |
(2271) 0x4891c4 MOV (%RDX,%R8,8),%R9 |
(2271) 0x4891c8 JMP 4891a0 |
0x4891ca MOV %RSI,%R10 |
0x4891cd SHR $0x2,%R10 |
0x4891d1 LEA 0x18(%RDI),%RAX |
0x4891d5 XOR %R8D,%R8D |
0x4891d8 JMP 4891ef |
0x4891da NOPW (%RAX,%RAX,1) |
(2272) 0x4891e0 INC %R8 |
(2272) 0x4891e3 MOV %R9,(%RAX) |
(2272) 0x4891e6 ADD $0x20,%RAX |
(2272) 0x4891ea DEC %R10 |
(2272) 0x4891ed JE 489182 |
(2272) 0x4891ef MOV -0x18(%RAX),%RCX |
(2272) 0x4891f3 TEST %RCX,%RCX |
(2272) 0x4891f6 JLE 48920f |
(2272) 0x4891f8 MOV $0x1,%R9D |
(2272) 0x4891fe CMP $0x1,%RCX |
(2272) 0x489202 JNE 489208 |
(2272) 0x489204 MOV (%RDX,%R8,8),%R9 |
(2272) 0x489208 INC %R8 |
(2272) 0x48920b MOV %R9,-0x18(%RAX) |
(2272) 0x48920f MOV -0x10(%RAX),%RCX |
(2272) 0x489213 TEST %RCX,%RCX |
(2272) 0x489216 JLE 48922f |
(2272) 0x489218 MOV $0x1,%R9D |
(2272) 0x48921e CMP $0x1,%RCX |
(2272) 0x489222 JNE 489228 |
(2272) 0x489224 MOV (%RDX,%R8,8),%R9 |
(2272) 0x489228 INC %R8 |
(2272) 0x48922b MOV %R9,-0x10(%RAX) |
(2272) 0x48922f MOV -0x8(%RAX),%RCX |
(2272) 0x489233 TEST %RCX,%RCX |
(2272) 0x489236 JLE 48924f |
(2272) 0x489238 MOV $0x1,%R9D |
(2272) 0x48923e CMP $0x1,%RCX |
(2272) 0x489242 JNE 489248 |
(2272) 0x489244 MOV (%RDX,%R8,8),%R9 |
(2272) 0x489248 INC %R8 |
(2272) 0x48924b MOV %R9,-0x8(%RAX) |
(2272) 0x48924f MOV (%RAX),%RCX |
(2272) 0x489252 TEST %RCX,%RCX |
(2272) 0x489255 JLE 4891e6 |
(2272) 0x489257 MOV $0x1,%R9D |
(2272) 0x48925d CMP $0x1,%RCX |
(2272) 0x489261 JNE 4891e0 |
(2272) 0x489267 MOV (%RDX,%R8,8),%R9 |
(2272) 0x48926b JMP 4891e0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►100.00+ | hypre_BoomerAMGSetup | par_amg_setup.c:730 | exec |
○ | hypre_PCGSetup | pcg.c:234 | exec |
○ | main | amg.c:398 | exec |
○ | __libc_init_first | libc.so.6 |
Path / |
Source file and lines | par_strength.c:2307-2320 |
Module | exec |
nb instructions | 21 |
nb uops | 21 |
loop length | 70 |
used x86 registers | 7 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 5.25 cycles |
front end | 5.25 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 3.00 | 2.50 | 1.17 | 0.83 | 1.00 | 2.50 | 3.00 | 1.00 |
cycles | 3.00 | 2.50 | 1.17 | 0.83 | 1.00 | 2.50 | 3.00 | 1.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 5.33 |
Stall cycles | 0.00 |
Front-end | 5.25 |
Dispatch | 3.00 |
Overall L1 | 5.25 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 10% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RSI,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 48918e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JAE 4891ca | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x4,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RSI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JB 4891af | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SHR $0x2,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA 0x18(%RDI),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4891ef | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Source file and lines | par_strength.c:2307-2320 |
Module | exec |
nb instructions | 21 |
nb uops | 21 |
loop length | 70 |
used x86 registers | 7 |
used mmx registers | 0 |
used xmm registers | 0 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 5.25 cycles |
front end | 5.25 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
---|---|---|---|---|---|---|---|---|
uops | 3.00 | 2.50 | 1.17 | 0.83 | 1.00 | 2.50 | 3.00 | 1.00 |
cycles | 3.00 | 2.50 | 1.17 | 0.83 | 1.00 | 2.50 | 3.00 | 1.00 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 5.33 |
Stall cycles | 0.00 |
Front-end | 5.25 |
Dispatch | 3.00 |
Overall L1 | 5.25 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 10% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 10% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
TEST %RSI,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JLE 48918e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
CMP $0x4,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JAE 4891ca | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
AND $-0x4,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
CMP %RSI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
JB 4891af | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
SHR $0x2,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
LEA 0x18(%RDI),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
JMP 4891ef | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼hypre_BoomerAMGCorrectCFMarker– | 0.03 | 0.01 |
○Loop 2272 - par_strength.c:2311-2315 - exec | 0.03 | 0.01 |
○Loop 2271 - par_strength.c:2311-2315 - exec | 0 | 0 |