Skip to content

Commit

Permalink
Merge pull request #310 from edoapra/too-many-open-files
Browse files Browse the repository at this point in the history
MPI-PR: check for number of open files
  • Loading branch information
edoapra authored Jul 13, 2023
2 parents 3cb97b6 + 06c48eb commit ab9af9c
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 5 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/github_actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ jobs:
armci_network: mpi-pr
f77: ifort
cc: icc
config_opts: LIBS=-lifcore
oneapi: /Users/runner/apps/oneapi
- os: ubuntu-20.04
experimental: true
Expand Down Expand Up @@ -116,6 +117,7 @@ jobs:
armci_network: mpi-ts
f77: ifort
cc: gcc
oneapi: /opt/intel/oneapi
exclude:
- armci_network: mpi-pr
mpi_impl: openmpi
Expand Down
29 changes: 24 additions & 5 deletions comex/src-mpi-pr/comex.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,9 @@ static char *static_server_buffer = NULL;
static int static_server_buffer_size = 0;
static int eager_threshold = -1;
static int max_message_size = -1;
#if ENABLE_SYSV
static int use_dev_shm = 1;
#endif
static int token_counter = 0;
static int init_from_comm = 0;

Expand Down Expand Up @@ -375,6 +377,7 @@ STATIC void check_devshm(int fd, size_t size);
static int devshm_initialized = 0;
static long devshm_fs_left = 0;
static long devshm_fs_initial = 0;
STATIC void count_open_fds(void);

int _comex_init(MPI_Comm comm)
{
Expand Down Expand Up @@ -7561,13 +7564,9 @@ STATIC void check_devshm(int fd, size_t size){
g_state.rank, g_state.node_size, devshm_fs_initial/CONVERT_TO_M, (long) ufs_statfs.f_bsize, (long) g_state.node_size);
#endif
}
// if (size > 0) {
count_open_fds();
newspace = (long) ( size*(g_state.node_size -1));
// }else{
// newspace = (long) ( size);
// }
if(newspace>0){
// noo fd for space<0
fstatfs(fd, &ufs_statfs);
#ifdef DEBUGSHM
fprintf(stderr, "[%d] /dev/shm filesize %ld filesize*np %ld initial devshm space %ld current /dev/shm space %ld \n",
Expand All @@ -7594,3 +7593,23 @@ STATIC void check_devshm(int fd, size_t size){
#endif
#endif
}

STATIC void count_open_fds(void) {
FILE *f = fopen("/proc/sys/fs/file-nr", "r");

long nfiles, unused, maxfiles;
fscanf(f, "%ld %ld %ld", &nfiles, &unused, &maxfiles);
#ifdef DEBUGSHM
if(nfiles % 1000 == 0) fprintf(stderr," %d: no. open files = %ld maxfiles = %ld\n", g_state.rank, nfiles, maxfiles);
#endif
if(nfiles > (maxfiles/100)*60) {
printf(" %d: running out of files; files = %ld maxfiles = %ld\n", g_state.rank, nfiles, maxfiles);
#if PAUSE_ON_ERROR
fprintf(stderr,"%d(%d): too many open files\n",
g_state.rank, getpid());
pause();
#endif
comex_error("count_open_fds: too many open files", -1);
}
fclose(f);
}

0 comments on commit ab9af9c

Please sign in to comment.