Skip to content

Commit

Permalink
Merge branch 'master' into node_awareness
Browse files Browse the repository at this point in the history
  • Loading branch information
solomonik committed Nov 9, 2023
2 parents f04f035 + 20e7853 commit 917bcb8
Show file tree
Hide file tree
Showing 40 changed files with 765 additions and 549 deletions.
130 changes: 130 additions & 0 deletions .github/workflows/autotest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# Based on Nick Naso's cpp.yml workflow, https://gist.github.com/NickNaso/0d478f1481686d5bcc868cac06620a60

on:
push:
pull_request:
release:

jobs:
build_cpp:
name: ${{ matrix.config.name }}
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- {
name: "Ubuntu_Latest_GCC",
os: ubuntu-latest,
artifact: "ubuntu_gcc.7z",
build_type: "Release",
cc: "gcc",
cxx: "g++",
archiver: "7z a",
}
- {
name: "Ubuntu_GCC_9",
os: ubuntu-latest,
artifact: "ubuntu_gcc9.7z",
build_type: "Release",
cc: "gcc",
cxx: "g++",
archiver: "7z a",
}
- {
name: "macOS Latest Clang",
os: macos-latest,
artifact: "macos_clang.7z",
build_type: "Release",
cc: "clang",
cxx: "clang++",
archiver: "7za a",
}
build_scalapack: [True, False]
steps:
- uses: actions/checkout@v2

- name: Print env
run: |
echo github.event.action: ${{ github.event.action }}
echo github.event_name: ${{ github.event_name }}
echo matrix.config.build_scalapack
echo ${{ matrix.config.build_scalapack }}
- name: Install dependencies on ubuntu
if: startsWith(matrix.config.name, 'Ubuntu')
run: |
sudo apt-get update;
sudo apt install -y g++ gfortran git make libblas-dev liblapack-dev mpich
- name: Install dependencies on macos
if: startsWith(matrix.config.os, 'macos')
run: |
brew install mpich
- name: Configure Cyclops and Build Scalapack
if: matrix.config.build_scalapack
shell: bash
run:
./configure CXXFLAGS="-O0" --build-scalapack

- name: Configure Cyclops without Scalapack
if: ( ! matrix.config.build_scalapack )
shell: bash
run:
./configure CXXFLAGS="-O0"


- name: Build Cyclops
shell: bash
run:
make -j4

- name: Build Tests and Test Cyclops C++
run:
make test

- name: Test Cyclops C++ with 2 MPI processes
if: startsWith(matrix.config.name, 'Ubuntu')
shell: bash
run:
export OMP_NUM_THREADS=1;
export MPIR_CVAR_DEVICE_COLLECTIVES=none;
make test2


- name: Build Python Install dependencies on ubuntu
if: startsWith(matrix.config.name, 'Ubuntu')
run: |
sudo apt install -y python3-dev virtualenv;
mkdir envs
- name: Build Python Install dependencies on macos
if: startsWith(matrix.config.os, 'macos')
run: |
brew install virtualenv
- name: Create Python virtual environment and install dependencies via pip
run: |
virtualenv -p python3 ./envs/py3env;
source ./envs/py3env/bin/activate;
pip install numpy cython setuptools
- name: Build Python library
run:
source ./envs/py3env/bin/activate;
make python

- name: Test Cyclops Python
run:
source ./envs/py3env/bin/activate;
make python_test

- name: Test Cyclops Python with 2 MPI processes
if: startsWith(matrix.config.name, 'Ubuntu')
shell: bash
run:
source ./envs/py3env/bin/activate;
export OMP_NUM_THREADS=1;
export MPIR_CVAR_DEVICE_COLLECTIVES=none;
make python_test2
57 changes: 0 additions & 57 deletions .travis.yml

This file was deleted.

2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ pip: $(BDIR)/setup.py $(BDIR)/lib_shared/libctf.so $(PYTHON_SRC_FILES)
cd src_python; \
ln -sf $(BDIR)/setup.py setup.py; \
mkdir -p $(BDIR)/lib_python/ctf && cp ctf/__init__.py $(BDIR)/lib_python/ctf/; \
pip install --force -b $(BDIR)/lib_python/ . --upgrade; \
pip install --force . --upgrade; \
rm setup.py; \
cd ..;

Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
## Cyclops Tensor Framework (CTF)
[<img src="https://travis-ci.org/cyclops-community/ctf.svg?branch=master">](https://travis-ci.org/cyclops-community/ctf)

Cyclops is a parallel (distributed-memory) numerical library for multidimensional arrays (tensors) in C++ and Python.

Expand Down Expand Up @@ -27,6 +26,12 @@ First, its necessary to run the configure script, which can be set to the approp
```
then execute ./configure with the appropriate options. Successful execution of this script, will generate a `config.mk` file and a `setup.py` file, needed for C++ and Python builds, respectively, as well as a how-did-i-configure file with info on how the build was configured. You may modify the `config.mk` and `setup.py` files thereafter, subsequent executions of configure will prompt to overwrite these files.

Note: there is a (now-fixed) [bug](https://github.com/pmodels/mpich/pull/6543) in recent versions of MPICH that causes a segmentation fault in CTF when executing with 2 or more processors.
The bug can be remedied without rebuilding CTF by setting an environment variable as follows,
```sh
export MPIR_CVAR_DEVICE_COLLECTIVES=none
```

### Dependencies and Supplemental Packages

The strict library dependencies of Cyclops are MPI and BLAS libraries.
Expand Down
61 changes: 35 additions & 26 deletions bench/model_trainer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ void train_off_vec_mat(int64_t n, int64_t m, World & dw, bool sp_A, bool sp_B, b
void train_ttm(int64_t sz, int64_t r, World & dw){
Timer TTM("TTM");
TTM.start();
srand48(dw.rank);
for (int order=2; order<7; order++){
int64_t n = 1;
while (std::pow(n,order) < sz){
Expand Down Expand Up @@ -87,6 +88,7 @@ void train_ttm(int64_t sz, int64_t r, World & dw){
void train_sparse_mttkrp(int64_t sz, int64_t R, World & dw){
Timer sMTTKRP("sMTTKRP");
sMTTKRP.start();
srand48(dw.rank);
for (double sp = .1; sp>.000001; sp*=.25){
int64_t n = (int64_t)cbrt(sz/sp);
int64_t lens[3] = {n, n, n};
Expand Down Expand Up @@ -223,6 +225,7 @@ void train_sps_vec_mat(int64_t n, int64_t m, World & dw, bool sp_A, bool sp_B, b
void train_ccsd(int64_t n, int64_t m, World & dw){
Timer ccsd_t("CCSD");
ccsd_t.start();
srand48(dw.rank);
int nv = sqrt(n);
int no = sqrt(m);
Integrals V(no, nv, dw);
Expand Down Expand Up @@ -271,18 +274,15 @@ void train_world(double dtime, World & dw, double step_size){
}
train_sparse_mttkrp(n*m/8, m, dw);
train_dns_vec_mat(n, m, dw);
train_sps_vec_mat(n-2, m, dw, 0, 0, 0);
train_sps_vec_mat(n-4, m-2, dw, 1, 0, 0);
train_sps_vec_mat(n-1, m-4, dw, 1, 1, 0);
train_sps_vec_mat(n-2, m-3, dw, 1, 1, 1);
train_off_vec_mat(n+7, m-4, dw, 0, 0, 0);
train_off_vec_mat(n-2, m+6, dw, 1, 0, 0);
train_off_vec_mat(n-5, m+2, dw, 1, 1, 0);
train_off_vec_mat(n-3, m-1, dw, 1, 1, 1);
train_ccsd(n/2, m/2, dw);
train_sps_vec_mat(n, m, dw, 0, 0, 0);
train_sps_vec_mat(n, m, dw, 0, 0, 1);
train_off_vec_mat(n, m, dw, 0, 0, 0);
train_off_vec_mat(n, m, dw, 1, 0, 0);
train_off_vec_mat(n, m, dw, 1, 1, 0);
train_off_vec_mat(n, m, dw, 1, 1, 1);
train_ccsd(n, m, dw);
train_sparse_mp3(n,m,dw);
niter++;
// m *= 1.9;
m *= step_size;
n += 2;
ctime = MPI_Wtime() - t_st;
Expand Down Expand Up @@ -310,7 +310,8 @@ void frize(std::set<int> & ps, int p){
}
}

void train_all(double time, bool write_coeff, bool dump_data, std::string coeff_file, std::string data_dir){
void train_all(double time, bool write_coeff, bool dump_data, std::string coeff_file, std::string data_dir,
int num_iterations, double time_jump, int verbose){
World dw(MPI_COMM_WORLD);
int np = dw.np;
int rank;
Expand All @@ -333,13 +334,6 @@ void train_all(double time, bool write_coeff, bool dump_data, std::string coeff_
MPI_Comm_split(dw.comm, color, key, &cm);
World w(cm);

// number of iterations for training
int num_iterations = 5;

// control how much dtime should be increased upon each iteration
// dtime = dtime * time_dump at the end of each iteration
double time_jump = 1.5;

double dtime = (time / (1- 1/time_jump)) / pow(time_jump, num_iterations - 1.0);
for (int i=0; i<num_iterations; i++){
// TODO probably need to adjust
Expand All @@ -351,35 +345,35 @@ void train_all(double time, bool write_coeff, bool dump_data, std::string coeff_
if (color != end_color){
train_world(dtime/5, w, step_size);
CTF_int::update_all_models(cm);
if (rank == 0){
if (rank == 0 && verbose == 1){
printf("Completed training round 1/5\n");
}
}

if (color != end_color)
train_world(dtime/5, w, step_size);
CTF_int::update_all_models(MPI_COMM_WORLD);
if (rank == 0){
if (rank == 0 && verbose == 1){
printf("Completed training round 2/5\n");
}
if (color != end_color){
train_world(dtime/5, w, step_size);
CTF_int::update_all_models(cm);
if (rank == 0){
if (rank == 0 && verbose == 1){
printf("Completed training round 3/5\n");
}
}

if (color != end_color)
train_world(dtime/5, w, step_size);
CTF_int::update_all_models(MPI_COMM_WORLD);
if (rank == 0){
if (rank == 0 && verbose == 1){
printf("Completed training round 4/5\n");
}
train_world(dtime/5, dw, step_size);
CTF_int::update_all_models(MPI_COMM_WORLD);

if (rank == 0){
if (rank == 0 && verbose == 1){
printf("Completed training round 5/5\n");
}
// double dtime for next iteration
Expand Down Expand Up @@ -409,8 +403,8 @@ char* getCmdOption(char ** begin,


int main(int argc, char ** argv){
int rank, np;
double time;
int rank, np, num_iterations, verbose;
double time, time_jump;
char * file_path;
int const in_num = argc;
char ** input_str = argv;
Expand All @@ -428,6 +422,21 @@ int main(int argc, char ** argv){
if (time < 0) time = 5.0;
} else time = 5.0;

if (getCmdOption(input_str, input_str+in_num, "-verbose")){
verbose = atoi(getCmdOption(input_str, input_str+in_num, "-verbose"));
if (verbose < 0 || verbose > 1) verbose = 0;
} else verbose = 0;

// number of iterations for training
if (getCmdOption(input_str, input_str+in_num, "-niter")){
num_iterations = atoi(getCmdOption(input_str, input_str+in_num, "-niter"));
} else num_iterations = 5;

// control how much dtime should be increased upon each iteration
// dtime = dtime * time_jump at the end of each iteration
if (getCmdOption(input_str, input_str+in_num, "-time_jump")){
time_jump = atof(getCmdOption(input_str, input_str+in_num, "-time_jump"));
} else time_jump = 1.5;

// Boolean expression that are used to pass command line argument to function train_all
bool write_coeff = false;
Expand Down Expand Up @@ -460,7 +469,7 @@ int main(int argc, char ** argv){
printf("Executing a wide set of contractions to train model with time budget of %lf sec\n", time);
if (write_coeff) printf("At the end of execution write new coefficients will be written to model file %s\n",file_path);
}
train_all(time, write_coeff, dump_data, coeff_file, data_dir_str);
train_all(time, write_coeff, dump_data, coeff_file, data_dir_str, num_iterations, time_jump, verbose);
}


Expand Down
3 changes: 1 addition & 2 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -1357,8 +1357,7 @@ fi


cat > $BUILDDIR/setup.py <<EOF
from distutils.core import setup
from distutils.extension import Extension
from setuptools import setup, Extension
import numpy
import os
Expand Down
Loading

0 comments on commit 917bcb8

Please sign in to comment.