OV - exec - Logs

********************************************************************************
MAQAO 2025.1.2 - ad4b42c12cfbc289a7a711f3ded92abe2eb90c0a::20250917-142411 || 2025/09/17
/beegfs/hackathon/users/eoseret/MAQAO_ad4b42/bin/maqao oneview -R1 WP=/home/eoseret/MAQAO_src/src/maqao/libprompt/libprompt_icx.so -WS -c=/beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/config.json --with-FLOPS object-coverage-threshold=0.1 lprof_params=btm=fp --replace xp=/beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881 -of=html 
CPY:  [true] /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/binaries/icx_3/exec --> /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/binaries/exec
CPY:  [true] /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/build/llama.cpp/../icx_3/bin/libggml-base.so --> /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/libs/libggml-base.so
CPY:  [true] /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/build/llama.cpp/../icx_3/bin/libggml-blas.so --> /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/libs/libggml-blas.so
CPY:  [true] /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/build/llama.cpp/../icx_3/bin/libggml-cpu.so --> /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/libs/libggml-cpu.so
CPY:  [true] /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/build/llama.cpp/../icx_3/bin/libggml.so --> /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/libs/libggml.so
CPY:  [true] /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/build/llama.cpp/../icx_3/bin/libllama.so --> /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/libs/libllama.so
CMD:  OMP_NUM_THREADS=8  I_MPI_PIN_ORDER=bunch  OMP_DISPLAY_AFFINITY=TRUE  OMP_PROC_BIND=spread  OMP_AFFINITY_FORMAT='OMP: pid %P tid %i thread %n bound to OS proc set {%A}'  OMP_DISPLAY_ENV=TRUE  I_MPI_PIN_DOMAIN=auto  I_MPI_DEBUG=4  OMP_PLACES=threads   /beegfs/hackathon/users/eoseret/MAQAO_ad4b42/bin/maqao lprof _caller=oneview btm=fp --xp="/beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/tools/lprof_npsu_run_0" --mpi-command="mpirun -n 1  " --collect-CPU-time-intervals -p=SSE_AVX_FLOP  --collect-topology tpp=8  -ldi=libggml-base.so,libggml-blas.so,libggml-cpu.so,libggml.so,libllama.so  -- /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/binaries/exec -m meta-llama-3.1-8b-instruct-Q8_0.gguf -no-cnv -t 8 -n 512 -p \"what is a LLM?\" --seed 0
CMD:  OMP_NUM_THREADS=64  I_MPI_PIN_ORDER=bunch  OMP_DISPLAY_AFFINITY=TRUE  OMP_PROC_BIND=spread  OMP_AFFINITY_FORMAT='OMP: pid %P tid %i thread %n bound to OS proc set {%A}'  OMP_DISPLAY_ENV=TRUE  I_MPI_PIN_DOMAIN=auto  I_MPI_DEBUG=4  OMP_PLACES=threads   /beegfs/hackathon/users/eoseret/MAQAO_ad4b42/bin/maqao lprof _caller=oneview btm=fp --xp="/beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/tools/lprof_npsu_run_1" --mpi-command="mpirun -n 1  " --collect-CPU-time-intervals -p=SSE_AVX_FLOP  --collect-topology tpp=64  -ldi=libggml-base.so,libggml-blas.so,libggml-cpu.so,libggml.so,libllama.so  -- /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/binaries/exec -m meta-llama-3.1-8b-instruct-Q8_0.gguf -no-cnv -t 64 -n 512 -p \"what is a LLM?\" --seed 0
CMD:  OMP_NUM_THREADS=96  I_MPI_PIN_ORDER=bunch  OMP_DISPLAY_AFFINITY=TRUE  OMP_PROC_BIND=spread  OMP_AFFINITY_FORMAT='OMP: pid %P tid %i thread %n bound to OS proc set {%A}'  OMP_DISPLAY_ENV=TRUE  I_MPI_PIN_DOMAIN=auto  I_MPI_DEBUG=4  OMP_PLACES=threads   /beegfs/hackathon/users/eoseret/MAQAO_ad4b42/bin/maqao lprof _caller=oneview btm=fp --xp="/beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/tools/lprof_npsu_run_2" --mpi-command="mpirun -n 1  " --collect-CPU-time-intervals -p=SSE_AVX_FLOP  --collect-topology tpp=96  -ldi=libggml-base.so,libggml-blas.so,libggml-cpu.so,libggml.so,libllama.so  -- /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/binaries/exec -m meta-llama-3.1-8b-instruct-Q8_0.gguf -no-cnv -t 96 -n 512 -p \"what is a LLM?\" --seed 0
CMD:  OMP_NUM_THREADS=128  I_MPI_PIN_ORDER=bunch  OMP_DISPLAY_AFFINITY=TRUE  OMP_PROC_BIND=spread  OMP_AFFINITY_FORMAT='OMP: pid %P tid %i thread %n bound to OS proc set {%A}'  OMP_DISPLAY_ENV=TRUE  I_MPI_PIN_DOMAIN=auto  I_MPI_DEBUG=4  OMP_PLACES=threads   /beegfs/hackathon/users/eoseret/MAQAO_ad4b42/bin/maqao lprof _caller=oneview btm=fp --xp="/beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/tools/lprof_npsu_run_3" --mpi-command="mpirun -n 1  " --collect-CPU-time-intervals -p=SSE_AVX_FLOP  --collect-topology tpp=128  -ldi=libggml-base.so,libggml-blas.so,libggml-cpu.so,libggml.so,libllama.so  -- /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/binaries/exec -m meta-llama-3.1-8b-instruct-Q8_0.gguf -no-cnv -t 128 -n 512 -p \"what is a LLM?\" --seed 0
CMD:  OMP_NUM_THREADS=160  I_MPI_PIN_ORDER=bunch  OMP_DISPLAY_AFFINITY=TRUE  OMP_PROC_BIND=spread  OMP_AFFINITY_FORMAT='OMP: pid %P tid %i thread %n bound to OS proc set {%A}'  OMP_DISPLAY_ENV=TRUE  I_MPI_PIN_DOMAIN=auto  I_MPI_DEBUG=4  OMP_PLACES=threads   /beegfs/hackathon/users/eoseret/MAQAO_ad4b42/bin/maqao lprof _caller=oneview btm=fp --xp="/beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/tools/lprof_npsu_run_4" --mpi-command="mpirun -n 1  " --collect-CPU-time-intervals -p=SSE_AVX_FLOP  --collect-topology tpp=160  -ldi=libggml-base.so,libggml-blas.so,libggml-cpu.so,libggml.so,libllama.so  -- /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/binaries/exec -m meta-llama-3.1-8b-instruct-Q8_0.gguf -no-cnv -t 160 -n 512 -p \"what is a LLM?\" --seed 0
CMD:  OMP_NUM_THREADS=192  I_MPI_PIN_ORDER=bunch  OMP_DISPLAY_AFFINITY=TRUE  OMP_PROC_BIND=spread  OMP_AFFINITY_FORMAT='OMP: pid %P tid %i thread %n bound to OS proc set {%A}'  OMP_DISPLAY_ENV=TRUE  I_MPI_PIN_DOMAIN=auto  I_MPI_DEBUG=4  OMP_PLACES=threads   /beegfs/hackathon/users/eoseret/MAQAO_ad4b42/bin/maqao lprof _caller=oneview btm=fp --xp="/beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/tools/lprof_npsu_run_5" --mpi-command="mpirun -n 1  " --collect-CPU-time-intervals -p=SSE_AVX_FLOP  --collect-topology tpp=192  -ldi=libggml-base.so,libggml-blas.so,libggml-cpu.so,libggml.so,libllama.so  -- /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/binaries/exec -m meta-llama-3.1-8b-instruct-Q8_0.gguf -no-cnv -t 192 -n 512 -p \"what is a LLM?\" --seed 0
In run 1x8, 30 loops were discarded from static analysis because their coverage
are lower than object_coverage_threshold value (0.1%).
That represents 0.2635627577547% of the execution time. To include them, change the value
in the experiment directory configuration file, then rerun the command with the additionnal parameter
--force-static-analysis
53 functions were discarded from static analysis because their coverage
are lower than object_coverage_threshold value (0.1%).
That represents 0.35238475818187% of the execution time. To include them, change the value
in the experiment directory configuration file, then rerun the command with the additionnal parameter
--force-static-analysis
In run 1x64, 39 loops were discarded from static analysis because their coverage
are lower than object_coverage_threshold value (0.1%).
That represents 0.13707596040331% of the execution time. To include them, change the value
in the experiment directory configuration file, then rerun the command with the additionnal parameter
--force-static-analysis
40 functions were discarded from static analysis because their coverage
are lower than object_coverage_threshold value (0.1%).
That represents 0.016088590840809% of the execution time. To include them, change the value
in the experiment directory configuration file, then rerun the command with the additionnal parameter
--force-static-analysis
In run 1x96, 38 loops were discarded from static analysis because their coverage
are lower than object_coverage_threshold value (0.1%).
That represents 0.098004994826624% of the execution time. To include them, change the value
in the experiment directory configuration file, then rerun the command with the additionnal parameter
--force-static-analysis
44 functions were discarded from static analysis because their coverage
are lower than object_coverage_threshold value (0.1%).
That represents 0.026602395708323% of the execution time. To include them, change the value
in the experiment directory configuration file, then rerun the command with the additionnal parameter
--force-static-analysis
In run 1x128, 38 loops were discarded from static analysis because their coverage
are lower than object_coverage_threshold value (0.1%).
That represents 0.11024425007054% of the execution time. To include them, change the value
in the experiment directory configuration file, then rerun the command with the additionnal parameter
--force-static-analysis
12 functions were discarded from static analysis because their coverage
are lower than object_coverage_threshold value (0.1%).
That represents 0.0039424388669431% of the execution time. To include them, change the value
in the experiment directory configuration file, then rerun the command with the additionnal parameter
--force-static-analysis
In run 1x160, 39 loops were discarded from static analysis because their coverage
are lower than object_coverage_threshold value (0.1%).
That represents 0.12571445824142% of the execution time. To include them, change the value
in the experiment directory configuration file, then rerun the command with the additionnal parameter
--force-static-analysis
7 functions were discarded from static analysis because their coverage
are lower than object_coverage_threshold value (0.1%).
That represents 0.00072736410220385% of the execution time. To include them, change the value
in the experiment directory configuration file, then rerun the command with the additionnal parameter
--force-static-analysis
In run 1x192, 44 loops were discarded from static analysis because their coverage
are lower than object_coverage_threshold value (0.1%).
That represents 0.12346417633671% of the execution time. To include them, change the value
in the experiment directory configuration file, then rerun the command with the additionnal parameter
--force-static-analysis
7 functions were discarded from static analysis because their coverage
are lower than object_coverage_threshold value (0.1%).
That represents 0.00077371572115226% of the execution time. To include them, change the value
in the experiment directory configuration file, then rerun the command with the additionnal parameter
--force-static-analysis
CMD: cd "/beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_run_1759511881";  OMP_TOOL_LIBRARIES=/home/eoseret/MAQAO_src/src/maqao/libprompt/libprompt_icx.so TARGET_PARALLEL_SECTION=ALL  OMP_NUM_THREADS=8  I_MPI_PIN_ORDER=bunch  OMP_DISPLAY_AFFINITY=TRUE  OMP_PROC_BIND=spread  OMP_AFFINITY_FORMAT='OMP: pid %P tid %i thread %n bound to OS proc set {%A}'  OMP_DISPLAY_ENV=TRUE  I_MPI_PIN_DOMAIN=auto  I_MPI_DEBUG=4  OMP_PLACES=threads   mpirun -n 1   /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/binaries/exec -m meta-llama-3.1-8b-instruct-Q8_0.gguf -no-cnv -t 8 -n 512 -p "what is a LLM?" --seed 0
CMD: cd "/beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_run_1759511881";  OMP_TOOL_LIBRARIES=/home/eoseret/MAQAO_src/src/maqao/libprompt/libprompt_icx.so TARGET_PARALLEL_SECTION=ALL  OMP_NUM_THREADS=64  I_MPI_PIN_ORDER=bunch  OMP_DISPLAY_AFFINITY=TRUE  OMP_PROC_BIND=spread  OMP_AFFINITY_FORMAT='OMP: pid %P tid %i thread %n bound to OS proc set {%A}'  OMP_DISPLAY_ENV=TRUE  I_MPI_PIN_DOMAIN=auto  I_MPI_DEBUG=4  OMP_PLACES=threads   mpirun -n 1   /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/binaries/exec -m meta-llama-3.1-8b-instruct-Q8_0.gguf -no-cnv -t 64 -n 512 -p "what is a LLM?" --seed 0
CMD: cd "/beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_run_1759511881";  OMP_TOOL_LIBRARIES=/home/eoseret/MAQAO_src/src/maqao/libprompt/libprompt_icx.so TARGET_PARALLEL_SECTION=ALL  OMP_NUM_THREADS=96  I_MPI_PIN_ORDER=bunch  OMP_DISPLAY_AFFINITY=TRUE  OMP_PROC_BIND=spread  OMP_AFFINITY_FORMAT='OMP: pid %P tid %i thread %n bound to OS proc set {%A}'  OMP_DISPLAY_ENV=TRUE  I_MPI_PIN_DOMAIN=auto  I_MPI_DEBUG=4  OMP_PLACES=threads   mpirun -n 1   /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/binaries/exec -m meta-llama-3.1-8b-instruct-Q8_0.gguf -no-cnv -t 96 -n 512 -p "what is a LLM?" --seed 0
CMD: cd "/beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_run_1759511881";  OMP_TOOL_LIBRARIES=/home/eoseret/MAQAO_src/src/maqao/libprompt/libprompt_icx.so TARGET_PARALLEL_SECTION=ALL  OMP_NUM_THREADS=128  I_MPI_PIN_ORDER=bunch  OMP_DISPLAY_AFFINITY=TRUE  OMP_PROC_BIND=spread  OMP_AFFINITY_FORMAT='OMP: pid %P tid %i thread %n bound to OS proc set {%A}'  OMP_DISPLAY_ENV=TRUE  I_MPI_PIN_DOMAIN=auto  I_MPI_DEBUG=4  OMP_PLACES=threads   mpirun -n 1   /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/binaries/exec -m meta-llama-3.1-8b-instruct-Q8_0.gguf -no-cnv -t 128 -n 512 -p "what is a LLM?" --seed 0
CMD: cd "/beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_run_1759511881";  OMP_TOOL_LIBRARIES=/home/eoseret/MAQAO_src/src/maqao/libprompt/libprompt_icx.so TARGET_PARALLEL_SECTION=ALL  OMP_NUM_THREADS=160  I_MPI_PIN_ORDER=bunch  OMP_DISPLAY_AFFINITY=TRUE  OMP_PROC_BIND=spread  OMP_AFFINITY_FORMAT='OMP: pid %P tid %i thread %n bound to OS proc set {%A}'  OMP_DISPLAY_ENV=TRUE  I_MPI_PIN_DOMAIN=auto  I_MPI_DEBUG=4  OMP_PLACES=threads   mpirun -n 1   /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/binaries/exec -m meta-llama-3.1-8b-instruct-Q8_0.gguf -no-cnv -t 160 -n 512 -p "what is a LLM?" --seed 0
CMD: cd "/beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_run_1759511881";  OMP_TOOL_LIBRARIES=/home/eoseret/MAQAO_src/src/maqao/libprompt/libprompt_icx.so TARGET_PARALLEL_SECTION=ALL  OMP_NUM_THREADS=192  I_MPI_PIN_ORDER=bunch  OMP_DISPLAY_AFFINITY=TRUE  OMP_PROC_BIND=spread  OMP_AFFINITY_FORMAT='OMP: pid %P tid %i thread %n bound to OS proc set {%A}'  OMP_DISPLAY_ENV=TRUE  I_MPI_PIN_DOMAIN=auto  I_MPI_DEBUG=4  OMP_PLACES=threads   mpirun -n 1   /beegfs/hackathon/users/eoseret/qaas_runs_test/175-950-2189/intel/llama.cpp/run/oneview_runs/multicore/icx_3/oneview_results_1759511881/binaries/exec -m meta-llama-3.1-8b-instruct-Q8_0.gguf -no-cnv -t 192 -n 512 -p "what is a LLM?" --seed 0
Report Configuration