| | | | | | | requested parallelism | walltime sum (s) | nb instances | any sync average per thread time (s) | any wait average per thread time (s) | parallelism overhead (%) | local speedup if perfectly balanced | global speedup if perfectly balanced |
start addr | function name | source location | level | ancestor thread num | invoker | parallel or teams | run_0 | run_1 | run_2 | run_3 | run_4 | run_5 | run_6 | run_0 | run_1 | run_2 | run_3 | run_4 | run_5 | run_6 | run_0 | run_1 | run_2 | run_3 | run_4 | run_5 | run_6 | run_0 | run_1 | run_2 | run_3 | run_4 | run_5 | run_6 | run_0 | run_1 | run_2 | run_3 | run_4 | run_5 | run_6 | run_0 | run_1 | run_2 | run_3 | run_4 | run_5 | run_6 | run_0 | run_1 | run_2 | run_3 | run_4 | run_5 | run_6 | run_0 | run_1 | run_2 | run_3 | run_4 | run_5 | run_6 |
libqmckl.so.0.0.0:0x31bdd | qmckl_compute_dtmp_c_hpc | qmckl_jastrow_champ.c:3977 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 83.607 | 41.996 | 23.125 | 11.646 | 6.157 | 4.093 | 3.948 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 0.0 | 7.17 E-3 | 1.547 | 0.652 | 0.449 | 0.256 | 0.540 | 0.0 | 7.17 E-3 | 1.547 | 0.652 | 0.449 | 0.256 | 0.540 | 0 | 0.02 | 6.69 | 5.60 | 7.29 | 6.26 | 13.7 | 1.000 | 1.000 | 1.072 | 1.059 | 1.079 | 1.067 | 1.158 | 1.000 | 1.000 | 1.014 | 1.007 | 1.006 | 1.003 | 1.007 |
libqmckl.so.0.0.0:0x313fa | qmckl_compute_tmp_c_hpc | qmckl_jastrow_champ.c:3907 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 43.479 | 21.842 | 11.883 | 6.251 | 3.258 | 2.253 | 1.970 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 0.0 | 29.4 E-3 | 0.671 | 0.441 | 0.282 | 0.154 | 0.202 | 0.0 | 29.4 E-3 | 0.671 | 0.441 | 0.282 | 0.154 | 0.202 | 0 | 0.13 | 5.64 | 7.06 | 8.65 | 6.83 | 10.2 | 1.000 | 1.001 | 1.060 | 1.076 | 1.095 | 1.073 | 1.114 | 1.000 | 1.000 | 1.006 | 1.005 | 1.004 | 1.002 | 1.003 |
libqmckl.so.0.0.0:0x2ef55 | qmckl_compute_een_rescaled_e_hpc | qmckl_jastrow_champ.c:2957 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 10.064 | 5.141 | 3.547 | 1.903 | 1.144 | 0.850 | 0.841 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 0.0 | 9.73 E-3 | 0.694 | 0.322 | 0.235 | 91.1 E-3 | 79.8 E-3 | 0.0 | 9.72 E-3 | 0.694 | 0.322 | 0.235 | 91.1 E-3 | 79.8 E-3 | 0 | 0.19 | 19.6 | 16.9 | 20.5 | 10.7 | 9.50 | 1.000 | 1.002 | 1.243 | 1.204 | 1.259 | 1.120 | 1.105 | 1.000 | 1.000 | 1.006 | 1.004 | 1.003 | 1.001 | 1.001 |
libqmckl.so.0.0.0:0x32c3d | qmckl_compute_jastrow_champ_factor_een_gl_hpc | qmckl_jastrow_champ.c:4352 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 9.984 | 5.320 | 3.112 | 1.981 | 1.658 | 1.801 | 2.084 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 0.0 | 416 E-6 | 54.5 E-3 | 67.0 E-3 | 93.1 E-3 | 0.151 | 0.235 | 0.0 | 410 E-6 | 54.5 E-3 | 67.0 E-3 | 93.1 E-3 | 0.151 | 0.235 | 0 | 0.01 | 1.75 | 3.38 | 5.62 | 8.40 | 11.3 | 1.000 | 1.000 | 1.018 | 1.035 | 1.060 | 1.092 | 1.127 | 1.000 | 1.000 | 1.000 | 1.001 | 1.001 | 1.002 | 1.003 |
libqmckl.so.0.0.0:0x2b1a9 | qmckl_compute_jastrow_champ_factor_ee_gl_hpc | qmckl_jastrow_champ.c:1646 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 6.420 | 3.214 | 1.624 | 0.844 | 0.455 | 0.263 | 0.136 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 0.0 | 1.02 E-3 | 13.8 E-3 | 36.6 E-3 | 50.9 E-3 | 13.6 E-3 | 8.49 E-3 | 0.0 | 1.02 E-3 | 13.8 E-3 | 36.6 E-3 | 50.9 E-3 | 13.6 E-3 | 8.49 E-3 | 0 | 0.03 | 0.85 | 4.34 | 11.2 | 5.18 | 6.24 | 1.000 | 1.000 | 1.009 | 1.045 | 1.126 | 1.055 | 1.066 | 1.000 | 1.000 | 1.000 | 1.000 | 1.001 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x2ac31 | qmckl_compute_ee_distance_rescaled_gl_hpc | qmckl_jastrow_champ.c:1973 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 1.868 | 0.939 | 0.491 | 0.267 | 0.191 | 0.191 | 0.192 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 19.3 E-6 | 1.79 E-3 | 13.8 E-3 | 19.3 E-3 | 23.6 E-3 | 13.2 E-3 | 18.4 E-3 | 11.3 E-6 | 1.78 E-3 | 13.8 E-3 | 19.3 E-3 | 23.5 E-3 | 13.1 E-3 | 18.4 E-3 | 0.00 | 0.19 | 2.82 | 7.23 | 12.3 | 6.87 | 9.61 | 1.000 | 1.002 | 1.029 | 1.078 | 1.141 | 1.074 | 1.106 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x2def9 | qmckl_compute_jastrow_champ_factor_en_gl_hpc | qmckl_jastrow_champ.c:2432 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 1.458 | 0.730 | 0.366 | 0.191 | 0.103 | 61.0 E-3 | 32.1 E-3 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 0.0 | 396 E-6 | 1.26 E-3 | 7.81 E-3 | 11.9 E-3 | 4.15 E-3 | 3.08 E-3 | 0.0 | 392 E-6 | 1.25 E-3 | 7.81 E-3 | 11.9 E-3 | 4.15 E-3 | 3.08 E-3 | 0 | 0.05 | 0.34 | 4.10 | 11.5 | 6.81 | 9.59 | 1.000 | 1.001 | 1.003 | 1.043 | 1.130 | 1.073 | 1.106 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x2cb68 | qmckl_compute_jastrow_champ_factor_en_hpc | qmckl_jastrow_champ.c:2265 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 0.879 | 0.443 | 0.226 | 0.121 | 66.8 E-3 | 43.7 E-3 | 30.2 E-3 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 20.0 | 0.0 | 2.02 E-3 | 4.57 E-3 | 8.34 E-3 | 9.45 E-3 | 6.07 E-3 | 8.14 E-3 | 0.0 | 2.01 E-3 | 4.56 E-3 | 8.34 E-3 | 9.44 E-3 | 6.06 E-3 | 8.13 E-3 | 0 | 0.46 | 2.02 | 6.90 | 14.1 | 13.9 | 26.9 | 1.000 | 1.005 | 1.021 | 1.074 | 1.165 | 1.161 | 1.368 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x29de9 | qmckl_compute_ee_distance_rescaled_hpc | qmckl_jastrow_champ.c:1843 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 85.6 E-3 | 43.1 E-3 | 22.2 E-3 | 12.1 E-3 | 7.54 E-3 | 8.89 E-3 | 23.4 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 5.49 E-6 | 33.0 E-6 | 332 E-6 | 661 E-6 | 762 E-6 | 1.10 E-3 | 10.9 E-3 | 392 E-9 | 25.5 E-6 | 323 E-6 | 653 E-6 | 755 E-6 | 1.09 E-3 | 10.9 E-3 | 0.01 | 0.08 | 1.50 | 5.45 | 10.1 | 12.4 | 46.6 | 1.000 | 1.001 | 1.015 | 1.058 | 1.112 | 1.141 | 1.874 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x2d6ce | qmckl_compute_en_distance_rescaled_gl_hpc | qmckl_jastrow_champ.c:2783 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 44.4 E-3 | 22.5 E-3 | 11.5 E-3 | 6.35 E-3 | 4.13 E-3 | 4.50 E-3 | 3.33 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.58 E-6 | 248 E-6 | 206 E-6 | 502 E-6 | 590 E-6 | 616 E-6 | 325 E-6 | 396 E-9 | 246 E-6 | 205 E-6 | 501 E-6 | 590 E-6 | 615 E-6 | 324 E-6 | 0.00 | 1.10 | 1.79 | 7.91 | 14.3 | 13.7 | 9.77 | 1.000 | 1.011 | 1.018 | 1.086 | 1.167 | 1.159 | 1.108 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
libqmckl.so.0.0.0:0x2c445 | qmckl_compute_en_distance_rescaled_hpc | qmckl_jastrow_champ.c:2627 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 26 | 52 | 20.6 E-3 | 10.4 E-3 | 5.20 E-3 | 2.74 E-3 | 1.50 E-3 | 891 E-6 | 1.64 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.72 E-6 | 21.8 E-6 | 20.8 E-6 | 118 E-6 | 170 E-6 | 46.8 E-6 | 924 E-6 | 302 E-9 | 20.9 E-6 | 20.1 E-6 | 118 E-6 | 169 E-6 | 46.1 E-6 | 923 E-6 | 0.01 | 0.21 | 0.40 | 4.32 | 11.3 | 5.25 | 56.4 | 1.000 | 1.002 | 1.004 | 1.045 | 1.127 | 1.055 | 2.292 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |