Intel Compiler Version 18.0.1.163 Build 20171018
gcc v4.8.5, Python v2.7.5
module load intel/2018_u1
module load cuda/8.0.61
# If you want to use gcc 6.3.0 to work with Intel Compiler 18
module load gcc/6.3.0
echo $MKLROOT
/pkg/intel/2018_u1/compilers_and_libraries_2018.1.163/linux/mkl
# Get the master release if CUDA is not being compiled.
cd /home/molpro/src
mkdir nwchem-6.8.1.opa.scalapack.cuda-tce
cd nwchem-6.8.1.opa.scalapack.cuda-tce
unzip ../nwchem-6.8.1-20180206.zip
mv nwchem-master nwchem-6.8.1
# Or get the 6.8.1 Branch to support multiple CUDA cards within one single node. Thanks to Edoardo Aprà!
git clone -b hotfix/release-6-8 https://github.com/nwchemgit/nwchem nwchem-6.8.1
# Starting here, refer to Jeff Hammond's page
# https://github.com/jeffhammond/HPCInfo/blob/master/ofi/NWChem-OPA.md
# Required minimum versions of tools:
# M4_VERSION=1.4.17
# LIBTOOL_VERSION=2.4.4
# AUTOCONF_VERSION=2.69
# AUTOMAKE_VERSION=1.15
export PATH="$HOME/local/bin:$PATH"
export NWCHEM_ROOT=/home/molpro/src/nwchem-6.8.1.opa.scalapack.cuda-tce
cd $NWCHEM_ROOT
# libfabric
wget https://github.com/ofiwg/libfabric/archive/master.zip
unzip master.zip
mv libfabric-master libfabric
cd $NWCHEM_ROOT/libfabric/
./autogen.sh
mkdir $NWCHEM_ROOT/libfabric/build
cd $NWCHEM_ROOT/libfabric/build
../configure CC=icc CXX=icpc --enable-psm2 --disable-udp --disable-sockets --disable-rxm \
--prefix=$NWCHEM_ROOT/deps
##
make -j 16 >& make.log &
make install
cd $NWCHEM_ROOT
# Intel MPI
export MPI_ROOT=$I_MPI_ROOT/intel64
export MPICC=$MPI_ROOT/bin/mpiicc
export MPICXX=$MPI_ROOT/bin/mpiicpc
export MPIFC=$MPI_ROOT/bin/mpiifort
# Casper
cd $NWCHEM_ROOT
git clone https://github.com/pmodels/casper
cd $NWCHEM_ROOT/casper
# Ming Si's instructions:
git submodule initgit submodule update
# Fallback to Jeff's instruction:
./autogen.sh
mkdir $NWCHEM_ROOT/casper/build
cd $NWCHEM_ROOT/casper/build
../configure CC=$MPICC --prefix=$NWCHEM_ROOT/deps
make -j 16 >& make.log &
make install
cd $NWCHEM_ROOT
# ARMCI-MPI
git clone --depth 10 https://github.com/jeffhammond/armci-mpi.git || \
wget https://github.com/jeffhammond/armci-mpi/archive/master.zip && \
unzip master.zip
cd armci-mpi
./autogen.sh
mkdir $NWCHEM_ROOT/armci-mpi/build
cd $NWCHEM_ROOT/armci-mpi/build
../configure MPICC=$MPICC MPIEXEC=$MPI_ROOT/bin/mpirun --enable-win-allocate --enable-explicit-progress \
--prefix=$NWCHEM_ROOT/deps
# configure: WARNING: unrecognized options: --enable-win-allocate, --enable-explicit-progress
make -j 16 >& make.log &
make install
# Now testing ARMCI-MPI
make checkprogs -j8 | tee checkprogs.log
make check MPIEXEC="$MPI_ROOT/bin/mpirun -n 2" | tee check-mpiexec.log
# avoid loading mvapich2 modules can eliminated the following three errors
# FAIL: 3
# FAIL: tests/test_malloc
# FAIL: tests/test_malloc_irreg
# FAIL: tests/contrib/armci-test
# Continue to compile NWChem, if gcc version >5, such as 6.3.0 cannot compile CUDA's memory.cu
# set "nvcc --compiler-bindir=<path to older GCC>" to use the old gcc
cd $NWCHEM_ROOT
source ../bashrc.nwchem.opa.scalapack.cuda-tce
cd $NWCHEM_TOP/src
make nwchem_config >& nwchem_config.log &
make -j 32 >& make.log &
# End of NWChem compilation #
# Refer to Jeff Hammond's page to setup the script of mpirun to work with Casper.
# Contents of bashrc.nwchem.opa.scalapack.cuda-tce
export NWCHEM_ROOT=/home/molpro/src/nwchem-6.8.1.opa.scalapack.cuda-tce
export NWCHEM_TOP="${NWCHEM_ROOT}/nwchem-6.8.1"
export NWCHEM_TARGET=LINUX64
export USE_PYTHONCONFIG=y
export USE_PYTHON64=y
export PYTHONVERSION=2.7
export PYTHONHOME=/usr
export NWCHEM_MODULES="all python"
export MRCC_METHODS=TRUE
export CUDA="nvcc --compiler-bindir=/usr/bin"
export TCE_CUDA=Y
export CUDA_LIBS="-L/pkg/cuda/8.0.61/lib64 -lcudart -lcublas -lstdc++"
export CUDA_FLAGS="-arch sm_60 "
export CUDA_ARCH="-arch sm60"
export CUDA_INCLUDE="-I. -I/pkg/cuda/8.0.61/include"
export USE_OPENMP=T
export ARMCI_NETWORK=ARMCI
export EXTERNAL_ARMCI_PATH=${NWCHEM_ROOT}/deps
MPI_DIR=${MPI_ROOT}
export USE_MPI=y
export USE_MPIF=y
export USE_MPIF4=y
export MPI_LIB="${MPI_DIR}/lib"
export MPI_INCLUDE="${MPI_DIR}/include"
MPICH_LIBS="-lmpifort -lmpi"
SYS_LIBS="-ldl -lrt -lpthread -static-intel"
export LIBMPI="-L${MPI_DIR}/lib -Wl,-rpath -Wl,${MPI_DIR}/lib ${MPICH_LIBS} ${SYS_LIBS}"
export CC=icc
export CXX=icpc
export FC=ifort
export F77=ifort
export BLAS_SIZE=8
export BLASOPT="-mkl=parallel -qopenmp"
export LAPACK_SIZE=8
export LAPACK_LIB="$BLASOPT"
export LAPACK_LIBS="$BLASOPT"
export USE_SCALAPACK=y
export SCALAPACK_SIZE=8
export SCALAPACK="-L${MKLROOT}/lib/intel64 -lmkl_scalapack_ilp64 -lmkl_intel_ilp64 -lmkl_intel_thread \
-lmkl_core -lmkl_blacs_intelmpi_ilp64 -liomp5 -lpthread -lm -ldl"