#!/bin/bash
#SBATCH --job-name=lumi_access
#SBATCH --ntasks=8
#SBATCH --ntasks-per-node=8
#SBATCH --time=01:00:00
#SBATCH --partition standard-g
#SBATCH --account=project_XXXXXXXXX
#SBATCH --gpus-per-node=8

module load LUMI/22.08
module load partition/G
module load singularity-bindings
module load aws-ofi-rccl
module load OpenMPI/4.1.3-cpeGNU-22.08

export SCRATCH=/scratch/project_XXXXXXXXX/
export NCCL_DEBUG=INFO
export NCCL_SOCKET_IFNAME=hsn
export MIOPEN_USER_DB_PATH=/tmp/${USER}-miopen-cache-${SLURM_JOB_ID}
export MIOPEN_CUSTOM_CACHE_DIR=${MIOPEN_USER_DB_PATH}
export CXI_FORK_SAFE=1
export CXI_FORK_SAFE_HP=1
export FI_CXI_DISABLE_CQ_HUGETLB=1
export SINGULARITYENV_LD_LIBRARY_PATH=/openmpi/lib:/opt/rocm-5.4.1/lib:${EBROOTAWSMINOFIMINRCCL}/lib:/opt/cray/xpmem/2.4.4-2.3_9.1__gff0e1d9.shasta/lib64:$SINGULARITYENV_LD_LIBRARY_PATH

mpirun -np 8 singularity exec -B"$SCRATCH:/work" $SCRATCH/lumi_access/tensorflow:rocm5.4.1-tf2.10-dev.sif bash -c ". horovod_env/bin/activate; cd /work/lumi_access/; python keras_horovod_example.py"
