Skip to content
This repository has been archived by the owner on Dec 2, 2021. It is now read-only.

Commit

Permalink
enable psm2 nameserver
Browse files Browse the repository at this point in the history
let psm2 use ip:port addr format and use nameserver to resolve it to the
psm2 native addr. This patch is based off the cart commit used by daos
master as of 2019-12-11.

note:
1) orterun needs to use this flag:
	--mca mtl ^psm2,ofi
2) server side needs to pass this shell variable under orterun:
	-x FI_PSM2_NAME_SERVER=1
   this variable tells psm2 to start the name server
3) both the server side and client side need the OFI_PORT variable:
	-x OFI_PORT=xxx
  I am changing the client side to pick a port automatically.

Signed-off-by: Yulu Jia <[email protected]>
  • Loading branch information
yulujia committed Dec 12, 2019
1 parent 4d03620 commit 48f7d46
Show file tree
Hide file tree
Showing 9 changed files with 42 additions and 13 deletions.
2 changes: 1 addition & 1 deletion SConstruct
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def scons():
# Compiler options
env.Append(CCFLAGS=['-g3', '-Wshadow', '-Wall', '-Werror', '-fpic',
'-D_GNU_SOURCE'])
env.Append(CCFLAGS=['-O2', '-pthread'])
env.Append(CCFLAGS=['-Og', '-pthread'])
env.Append(CFLAGS=['-std=gnu99'])
if not GetOption('clean'):
env.AppendIfSupported(CCFLAGS=DESIRED_FLAGS)
Expand Down
4 changes: 3 additions & 1 deletion src/cart/crt_hg.c
Original file line number Diff line number Diff line change
Expand Up @@ -453,11 +453,13 @@ crt_get_info_string(char **string)
} else {
/* OFI_PORT is only for context 0 to use */
port = crt_na_ofi_conf.noc_port;
crt_na_ofi_conf.noc_port = -1;
crt_na_ofi_conf.noc_port++;

D_ASPRINTF(*string, "%s://%s/%s:%d", plugin_str,
crt_na_ofi_conf.noc_domain,
crt_na_ofi_conf.noc_ip_str, port);
// D_ASPRINTF(*string, "%s://%s:%d", plugin_str,
// crt_na_ofi_conf.noc_ip_str, port);
}

if (*string == NULL)
Expand Down
2 changes: 1 addition & 1 deletion src/cart/crt_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ int crt_na_ofi_config_init(void)

port = -1;
port_str = getenv("OFI_PORT");
if (crt_is_service() && port_str != NULL && strlen(port_str) > 0) {
if (port_str != NULL && strlen(port_str) > 0) {
if (!is_integer_str(port_str)) {
D_DEBUG(DB_ALL, "ignore invalid OFI_PORT %s.",
port_str);
Expand Down
3 changes: 3 additions & 0 deletions src/crt_launch/crt_launch.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ struct host {
};

static int my_rank;
volatile static int myflag = 0;

struct options_t {
int is_client;
Expand Down Expand Up @@ -153,6 +154,8 @@ get_self_uri(struct host *h)
char *p;
int len;
int rc;
while (myflag)
sched_yield();

rc = crt_init(0, CRT_FLAG_BIT_SERVER | CRT_FLAG_BIT_PMIX_DISABLE |
CRT_FLAG_BIT_LM_DISABLE);
Expand Down
8 changes: 4 additions & 4 deletions src/test/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,10 @@ def scons():
tenv.Requires(target, [cart_lib, gurt_lib])
tenv.Install(os.path.join("$PREFIX", 'TESTING', 'tests'), target)

for test in ECHO_TEST_SRC:
target = tenv.Program(test)
tenv.Requires(target, [cart_lib, gurt_lib])
tenv.Install(os.path.join("$PREFIX", 'TESTING', 'tests'), target)
# for test in ECHO_TEST_SRC:
# target = tenv.Program(test)
# tenv.Requires(target, [cart_lib, gurt_lib])
# tenv.Install(os.path.join("$PREFIX", 'TESTING', 'tests'), target)

for test in IV_TESTS:
target = tenv.Program(test)
Expand Down
2 changes: 1 addition & 1 deletion src/test/test_hlc_net.c
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ static int srv_init(void)

int main(int argc, char *argv[])
{
int i, rc;
int i, rc = 0;

dbg("---%s--->", __func__);

Expand Down
6 changes: 6 additions & 0 deletions src/test/tests_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,9 @@ tc_cli_start_basic(char *local_group_name, char *srv_group_name,
uint32_t grp_size;
int attach_retries = opts.num_attach_retries;
int rc = 0;
int myflag = 0;
while (myflag)
sched_yield();

D_ASSERTF(opts.is_initialized == true, "tc_test_init not called.\n");

Expand Down Expand Up @@ -452,6 +455,7 @@ tc_srv_start_basic(char *srv_group_name, crt_context_t *crt_ctx,
char *my_uri;
d_rank_t my_rank;
int rc = 0;
int myflag = 0;

D_ASSERTF(opts.is_initialized == true, "tc_test_init not called.\n");

Expand All @@ -461,6 +465,8 @@ tc_srv_start_basic(char *srv_group_name, crt_context_t *crt_ctx,
rc = d_log_init();
D_ASSERT(rc == 0);

while (myflag)
sched_yield();
if (init_opt) {
rc = crt_init_opt(srv_group_name, CRT_FLAG_BIT_SERVER |
CRT_FLAG_BIT_PMIX_DISABLE |
Expand Down
11 changes: 7 additions & 4 deletions test/rpc/cart_rpc_two_node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ defaultENV:
#!filter-only : /run/env_CRT_CTX_SHARE_ADDR/sep
#!filter-only : /run/tests/rpc_error
D_LOG_MASK: "DEBUG,MEM=ERR"
CRT_PHY_ADDR_STR: "ofi+sockets"
OFI_INTERFACE: "eth0"
D_LOG_FILE_APPEND_PID: "1"
CRT_PHY_ADDR_STR: "ofi+psm2"
OFI_INTERFACE: "ib0"
OFI_PORT: "22222"
FI_PSM2_NAME_SERVER: "1"
srv_CRT_CTX_NUM: "16"
cli_CRT_CTX_NUM: "16"
env_CRT_CTX_SHARE_ADDR: !mux
Expand Down Expand Up @@ -49,11 +52,11 @@ tests: !mux
name: test_group_basic
srv_bin: ../bin/crt_launch
srv_arg: "-e tests/test_group_np_srv --name tg_srv_grp --cfg_path=."
srv_env: "-x D_FI_CONFIG=../etc/fault-inject-cart.yaml"
srv_env: "-x FI_PSM2_NAME_SERVER=1 -x D_LOG_FILE_APPEND_PID=1 -x OFI_PORT=44444 -x D_FI_CONFIG=../etc/fault-inject-cart.yaml"
srv_ppn: "1"
cli_bin: tests/test_group_np_cli
cli_arg: "--name client_group --attach_to tg_srv_grp --cfg_path=."
cli_env: "-x D_FI_CONFIG=../etc/fault-inject-cart.yaml"
cli_env: "-x D_LOG_FILE_APPEND_PID=1 -x OFI_PORT=33333 -x D_FI_CONFIG=../etc/fault-inject-cart.yaml"
cli_ppn: "1"
ep_credits_1:
name: ep_credits_1
Expand Down
17 changes: 16 additions & 1 deletion test/util/cart_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,15 @@ def get_env(self, cartobj):
log_file = os.path.join(log_path, "output.log")

log_mask = cartobj.params.get("D_LOG_MASK", "/run/defaultENV/")
log_append_pid = cartobj.params.get("D_LOG_FILE_APPEND_PID",
"/run/defaultENV/")
crt_phy_addr = cartobj.params.get("CRT_PHY_ADDR_STR",
"/run/defaultENV/")
ofi_interface = cartobj.params.get("OFI_INTERFACE", "/run/defaultENV/")
ofi_port = cartobj.params.get("OFI_PORT", "/run/defaultENV/")
fi_psm2_name_server = cartobj.params.get("FI_PSM2_NAME_SERVER",
"/run/defaultENV/")

ofi_share_addr = cartobj.params.get("CRT_CTX_SHARE_ADDR",
"/run/env_CRT_CTX_SHARE_ADDR/*/")

Expand All @@ -152,12 +158,21 @@ def get_env(self, cartobj):
if log_mask is not None:
env += " -x D_LOG_MASK={!s}".format(log_mask)

if log_append_pid:
env += " -x D_LOG_FILE_APPEND_PID={!s}".format(log_append_pid)

if crt_phy_addr is not None:
env += " -x CRT_PHY_ADDR_STR={!s}".format(crt_phy_addr)

if ofi_interface is not None:
env += " -x OFI_INTERFACE={!s}".format(ofi_interface)

if ofi_port:
env += " -x OFI_PORT={!s}".format(ofi_port)

if fi_psm2_name_server:
env += " -x FI_PSM2_NAME_SERVER={!s}".format(fi_psm2_name_server)

if ofi_share_addr is not None:
env += " -x CRT_CTX_SHARE_ADDR={!s}".format(ofi_share_addr)

Expand Down Expand Up @@ -225,7 +240,7 @@ def build_cmd(self, cartobj, env, host, report_uri=True, urifile=None):
else:
hostfile = self.write_host_file(tst_host,tst_ppn)

tst_cmd = "{} --mca btl self,tcp -N {} --hostfile {} "\
tst_cmd = "{} --mca mtl ^psm2,ofi -N {} --hostfile {} "\
.format(orterun_bin, tst_ppn, hostfile)

if urifile is not None:
Expand Down

0 comments on commit 48f7d46

Please sign in to comment.