| | | | | | | requested parallelism | walltime sum (s) | nb instances | any sync average per thread time (s) | any wait average per thread time (s) | parallelism overhead (%) | local speedup if perfectly balanced | global speedup if perfectly balanced |
start addr | function name | source location | level | ancestor thread num | invoker | parallel or teams | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x64 | 2x96 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x64 | 2x96 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x64 | 2x96 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x64 | 2x96 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x64 | 2x96 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x64 | 2x96 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x64 | 2x96 | 2x1 | 2x2 | 2x4 | 2x8 | 2x16 | 2x32 | 2x64 | 2x96 |
exec:0x410d13 | ljForce | ljForce.c:172 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 96 | 268.907 | 134.614 | 67.563 | 34.121 | 17.444 | 9.680 | 5.277 | 3.853 | 101 | 101 | 101 | 101 | 101 | 101 | 101 | 101 | 94.8 E-6 | 0.172 | 0.234 | 0.395 | 0.528 | 1.216 | 1.051 | 0.802 | 39.7 E-6 | 0.172 | 0.234 | 0.395 | 0.528 | 1.216 | 1.051 | 0.802 | 0.00 | 0.13 | 0.35 | 1.16 | 3.03 | 12.6 | 19.9 | 20.8 | 1.000 | 1.001 | 1.003 | 1.012 | 1.031 | 1.144 | 1.249 | 1.263 | 1.000 | 1.001 | 1.003 | 1.009 | 1.020 | 1.072 | 1.086 | 1.070 |
exec:0x417e8e | timestep.A | timestep.c:47 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 96 | 4.918 | 3.153 | 2.196 | 1.489 | 0.960 | 0.611 | 0.433 | 0.307 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 0.0 | 0.260 | 0.346 | 0.369 | 0.323 | 0.244 | 0.188 | 0.125 | 0.0 | 0.260 | 0.346 | 0.369 | 0.323 | 0.244 | 0.188 | 0.125 | 0 | 7.52 | 15.8 | 24.8 | 33.6 | 40.2 | 42.8 | 40.8 | 1.000 | 1.081 | 1.188 | 1.329 | 1.506 | 1.672 | 1.747 | 1.688 | 1.000 | 1.002 | 1.004 | 1.009 | 1.012 | 1.014 | 1.014 | 1.010 |
exec:0x417e21 | timestep.A | timestep.c:43 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 96 | 1.919 | 0.944 | 0.481 | 0.244 | 0.155 | 0.106 | 97.4 E-3 | 74.6 E-3 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 0.0 | 6.63 E-3 | 20.8 E-3 | 28.5 E-3 | 36.2 E-3 | 32.9 E-3 | 40.3 E-3 | 33.1 E-3 | 0.0 | 6.62 E-3 | 20.8 E-3 | 28.5 E-3 | 36.2 E-3 | 32.9 E-3 | 40.3 E-3 | 33.1 E-3 | 0 | 0.70 | 4.33 | 11.6 | 23.3 | 30.7 | 41.1 | 44.4 | 1.000 | 1.007 | 1.045 | 1.131 | 1.304 | 1.444 | 1.697 | 1.797 | 1.000 | 1.000 | 1.000 | 1.001 | 1.001 | 1.002 | 1.003 | 1.003 |
exec:0x417ef4 | timestep.A | timestep.c:71 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 96 | 1.826 | 0.911 | 0.470 | 0.234 | 0.142 | 0.133 | 0.107 | 0.105 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 0.0 | 3.20 E-3 | 12.8 E-3 | 9.04 E-3 | 19.4 E-3 | 30.3 E-3 | 39.9 E-3 | 41.9 E-3 | 0.0 | 3.18 E-3 | 12.8 E-3 | 9.03 E-3 | 19.4 E-3 | 30.3 E-3 | 39.9 E-3 | 41.9 E-3 | 0 | 0.35 | 2.73 | 3.86 | 13.4 | 23.6 | 36.2 | 39.8 | 1.000 | 1.004 | 1.028 | 1.040 | 1.154 | 1.308 | 1.567 | 1.662 | 1.000 | 1.000 | 1.000 | 1.000 | 1.001 | 1.002 | 1.003 | 1.003 |
exec:0x417ddd | timestep.A | timestep.c:39 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 96 | 1.809 | 0.877 | 0.384 | 0.164 | 0.107 | 77.2 E-3 | 57.1 E-3 | 48.3 E-3 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 0.0 | 15.5 E-3 | 26.6 E-3 | 31.8 E-3 | 40.1 E-3 | 28.8 E-3 | 26.3 E-3 | 26.0 E-3 | 0.0 | 15.5 E-3 | 26.6 E-3 | 31.8 E-3 | 40.1 E-3 | 28.8 E-3 | 26.3 E-3 | 26.0 E-3 | 0 | 1.77 | 6.93 | 19.3 | 37.4 | 37.6 | 45.6 | 54.1 | 1.000 | 1.018 | 1.074 | 1.240 | 1.596 | 1.601 | 1.839 | 2.178 | 1.000 | 1.000 | 1.000 | 1.001 | 1.001 | 1.002 | 1.002 | 1.002 |
exec:0x410c6d | ljForce | ljForce.c:157 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 96 | 0.861 | 0.637 | 0.598 | 0.629 | 0.395 | 0.290 | 0.196 | 0.170 | 101 | 101 | 101 | 101 | 101 | 101 | 101 | 101 | 0.0 | 65.0 E-3 | 0.133 | 0.271 | 0.180 | 0.151 | 87.5 E-3 | 69.7 E-3 | 0.0 | 65.0 E-3 | 0.133 | 0.271 | 0.180 | 0.151 | 87.4 E-3 | 69.7 E-3 | 0 | 10.2 | 22.2 | 42.1 | 45.5 | 51.6 | 44.6 | 41.1 | 1.000 | 1.113 | 1.285 | 1.728 | 1.834 | 2.067 | 1.804 | 1.699 | 1.000 | 1.000 | 1.002 | 1.006 | 1.007 | 1.008 | 1.007 | 1.006 |
exec:0x417ff2 | kineticEnergy | timestep.c:120 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 96 | 0.192 | 0.105 | 49.2 E-3 | 19.3 E-3 | 13.5 E-3 | 10.3 E-3 | 9.05 E-3 | 7.13 E-3 | 13.0 | 13.0 | 13.0 | 13.0 | 13.0 | 13.0 | 13.0 | 13.0 | 10.5 E-6 | 5.61 E-3 | 5.11 E-3 | 3.49 E-3 | 4.86 E-3 | 3.45 E-3 | 3.90 E-3 | 3.11 E-3 | 3.99 E-6 | 5.60 E-3 | 5.10 E-3 | 3.46 E-3 | 4.84 E-3 | 3.43 E-3 | 3.88 E-3 | 3.09 E-3 | 0.01 | 5.16 | 10.4 | 18.0 | 35.7 | 33.6 | 42.1 | 43.8 | 1.000 | 1.054 | 1.116 | 1.219 | 1.555 | 1.507 | 1.727 | 1.779 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x40dbd3 | setTemperature.A | initAtoms.c:151 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 96 | 0.187 | 95.6 E-3 | 49.0 E-3 | 25.4 E-3 | 13.8 E-3 | 8.87 E-3 | 8.57 E-3 | 10.2 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 0.0 | 1.11 E-3 | 915 E-6 | 696 E-6 | 412 E-6 | 567 E-6 | 745 E-6 | 1.38 E-3 | 0.0 | 1.11 E-3 | 915 E-6 | 696 E-6 | 412 E-6 | 566 E-6 | 745 E-6 | 1.38 E-3 | 0 | 1.14 | 1.87 | 2.74 | 3.00 | 6.39 | 8.67 | 13.6 | 1.000 | 1.012 | 1.019 | 1.028 | 1.031 | 1.068 | 1.095 | 1.158 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x40dcba | randomDisplacements | initAtoms.c:205 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 96 | 80.6 E-3 | 44.4 E-3 | 24.5 E-3 | 12.1 E-3 | 6.19 E-3 | 3.33 E-3 | 2.35 E-3 | 2.18 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 0.0 | 1.98 E-3 | 2.18 E-3 | 1.05 E-3 | 669 E-6 | 535 E-6 | 636 E-6 | 681 E-6 | 0.0 | 1.98 E-3 | 2.18 E-3 | 1.05 E-3 | 669 E-6 | 535 E-6 | 636 E-6 | 681 E-6 | 0 | 4.12 | 8.88 | 8.67 | 10.8 | 16.1 | 27.4 | 32.2 | 1.000 | 1.043 | 1.098 | 1.095 | 1.121 | 1.192 | 1.377 | 1.474 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x406a67 | main.A | CoMD.c:207 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 96 | 48.1 E-3 | 32.4 E-3 | 23.1 E-3 | 20.4 E-3 | 12.7 E-3 | 8.64 E-3 | 7.45 E-3 | 7.75 E-3 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 0.0 | 3.38 E-3 | 3.78 E-3 | 7.39 E-3 | 5.10 E-3 | 3.65 E-3 | 3.18 E-3 | 3.75 E-3 | 0.0 | 3.38 E-3 | 3.78 E-3 | 7.39 E-3 | 5.10 E-3 | 3.65 E-3 | 3.18 E-3 | 3.75 E-3 | 0 | 9.16 | 16.5 | 36.5 | 40.3 | 42.4 | 43.9 | 49.4 | 1.000 | 1.101 | 1.198 | 1.574 | 1.676 | 1.736 | 1.781 | 1.978 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x40db77 | setVcm.A | initAtoms.c:123 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 96 | 10.5 E-3 | 7.02 E-3 | 3.81 E-3 | 714 E-6 | 511 E-6 | 235 E-6 | 182 E-6 | 124 E-6 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 0.0 | 927 E-6 | 873 E-6 | 336 E-6 | 324 E-6 | 136 E-6 | 113 E-6 | 70.6 E-6 | 0.0 | 926 E-6 | 873 E-6 | 336 E-6 | 324 E-6 | 136 E-6 | 113 E-6 | 70.5 E-6 | 0 | 10.8 | 23.1 | 46.3 | 63.5 | 58.2 | 60.6 | 56.8 | 1.000 | 1.121 | 1.300 | 1.864 | 2.737 | 2.393 | 2.538 | 2.315 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x40da9c | setVcm.A | initAtoms.c:218 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 96 | 8.72 E-3 | 6.04 E-3 | 3.37 E-3 | 837 E-6 | 537 E-6 | 324 E-6 | 220 E-6 | 200 E-6 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 2.49 E-6 | 766 E-6 | 731 E-6 | 283 E-6 | 310 E-6 | 185 E-6 | 123 E-6 | 99.6 E-6 | 435 E-9 | 765 E-6 | 730 E-6 | 282 E-6 | 309 E-6 | 184 E-6 | 122 E-6 | 98.4 E-6 | 0.03 | 10.3 | 21.9 | 33.6 | 57.6 | 57.2 | 56.2 | 49.2 | 1.000 | 1.114 | 1.280 | 1.507 | 2.361 | 2.336 | 2.281 | 1.967 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |
exec:0x40dc63 | setTemperature.A | initAtoms.c:184 | 0 | 0 | runtime | parallel | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 96 | 6.74 E-3 | 4.36 E-3 | 1.94 E-3 | 349 E-6 | 235 E-6 | 91.0 E-6 | 72.6 E-6 | 55.5 E-6 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 0.0 | 537 E-6 | 292 E-6 | 145 E-6 | 128 E-6 | 34.0 E-6 | 37.0 E-6 | 16.2 E-6 | 0.0 | 536 E-6 | 292 E-6 | 145 E-6 | 128 E-6 | 33.8 E-6 | 36.9 E-6 | 16.1 E-6 | 0 | 10.1 | 15.3 | 40.8 | 54.4 | 37.3 | 50.7 | 29.3 | 1.000 | 1.113 | 1.180 | 1.690 | 2.192 | 1.594 | 2.029 | 1.414 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 |