Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CORE: fix coll trace for service team #1046

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions src/components/tl/mlx5/tl_mlx5.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,6 @@ typedef struct ucc_tl_mlx5_rcache_region {
#define UCC_TL_CTX_LIB(_ctx) \
(ucc_derived_of((_ctx)->super.super.lib, ucc_tl_mlx5_lib_t))

#define IS_SERVICE_TEAM(_team) \
((_team)->super.super.params.scope == UCC_CL_LAST + 1)

#define SQUARED(_num) ((_num) * (_num))

ucc_status_t tl_mlx5_create_rcache(ucc_tl_mlx5_context_t *ctx);
Expand Down
2 changes: 1 addition & 1 deletion src/components/tl/mlx5/tl_mlx5_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ ucc_status_t ucc_tl_mlx5_context_ib_ctx_pd_setup(ucc_base_context_t *context)
while (UCC_INPROGRESS == (status = ucc_collective_test(&req->super))) {
ucc_context_progress(core_ctx);
}
ucc_collective_finalize(&req->super);
ucc_collective_finalize_internal(req);

if (UCC_OK != status) {
tl_debug(context->lib, "failure during mlx5 ctx bcast");
Expand Down
7 changes: 4 additions & 3 deletions src/components/tl/sharp/tl_sharp_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <inttypes.h>
#include "tl_sharp.h"
#include "utils/arch/cpu.h"
#include "core/ucc_service_coll.h"

static int ucc_tl_sharp_oob_barrier(void *arg)
{
Expand Down Expand Up @@ -141,7 +142,7 @@ static int ucc_tl_sharp_service_barrier(void *arg)
ucc_context_progress(ctx->super.super.ucc_context);
status = ucc_collective_test(&req->super);
} while (status == UCC_INPROGRESS);
ucc_collective_finalize(&req->super);
ucc_collective_finalize_internal(req);

return status;
}
Expand Down Expand Up @@ -179,7 +180,7 @@ static int ucc_tl_sharp_service_gather(void *arg, int root, void *sbuf,
ucc_context_progress(ctx->super.super.ucc_context);
status = ucc_collective_test(&req->super);
} while (status == UCC_INPROGRESS);
ucc_collective_finalize(&req->super);
ucc_collective_finalize_internal(req);

if (subset.myrank != root) {
ucc_free(rbuf);
Expand Down Expand Up @@ -208,7 +209,7 @@ static int ucc_tl_sharp_service_bcast(void *arg, void *buf, int size, int root)
status = ucc_collective_test(&req->super);
} while (status == UCC_INPROGRESS);

ucc_collective_finalize(&req->super);
ucc_collective_finalize_internal(req);
return status;
}

Expand Down
6 changes: 5 additions & 1 deletion src/components/tl/ucc_tl.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -165,4 +165,8 @@ typedef struct ucc_tl_lib_attr {
#define UCC_TL_TEAM_MAP(_tl_team) (_tl_team)->super.super.params.map

#define UCC_TL_TEAM_OOB(_tl_team) (_tl_team)->super.super.params.params.oob

#define UCC_TL_IS_SERVICE_TEAM(_tl_team) \
((_tl_team)->super.super.params.scope == UCC_CL_LAST + 1)

#endif
5 changes: 1 addition & 4 deletions src/components/tl/ucp/tl_ucp.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,11 +175,8 @@ extern ucc_config_field_t ucc_tl_ucp_lib_config_table[];
#define UCC_TL_UCP_TEAM_CTX(_team) \
(ucc_derived_of((_team)->super.super.context, ucc_tl_ucp_context_t))

#define IS_SERVICE_TEAM(_team) \
((_team)->super.super.params.scope == UCC_CL_LAST + 1)

#define USE_SERVICE_WORKER(_team) \
(IS_SERVICE_TEAM(_team) && UCC_TL_UCP_TEAM_CTX(_team)->cfg.service_worker)
(UCC_TL_IS_SERVICE_TEAM(_team) && UCC_TL_UCP_TEAM_CTX(_team)->cfg.service_worker)

#define UCC_TL_UCP_TASK_TEAM(_task) \
(ucc_derived_of((_task)->super.team, ucc_tl_ucp_team_t))
Expand Down
2 changes: 1 addition & 1 deletion src/components/tl/ucp/tl_ucp_ep.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down
2 changes: 1 addition & 1 deletion src/components/tl/ucp/tl_ucp_ep.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ static inline ucc_status_t ucc_tl_ucp_get_ep(ucc_tl_ucp_team_t *team,
ucc_team_t *core_team = UCC_TL_CORE_TEAM(team);
/* Core super.super.team ptr is NULL for service_team
which has scope == UCC_CL_LAST + 1*/
ucc_assert((NULL != core_team) || IS_SERVICE_TEAM(team));
ucc_assert((NULL != core_team) || UCC_TL_IS_SERVICE_TEAM(team));
ctx_rank = core_team ? ucc_get_ctx_rank(core_team, core_rank)
: core_rank;
*ep = team->worker->eps[ctx_rank];
Expand Down
5 changes: 3 additions & 2 deletions src/components/tl/ucp/tl_ucp_team.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ UCC_CLASS_INIT_FUNC(ucc_tl_ucp_team_t, ucc_base_context_t *tl_context,
}
}

if (ucc_global_config.file_cfg && !IS_SERVICE_TEAM(self) &&
if (ucc_global_config.file_cfg && !UCC_TL_IS_SERVICE_TEAM(self) &&
ctx->topo_required && tl_context->lib->use_tuning) {
status = ucc_add_team_sections(&self->cfg, ucc_tl_ucp_lib_config_table,
self->topo, &self->tuning_str,
Expand All @@ -91,7 +91,8 @@ UCC_CLASS_INIT_FUNC(ucc_tl_ucp_team_t, ucc_base_context_t *tl_context,
self->cfg.use_reordering = 0;
}

if (self->topo && !IS_SERVICE_TEAM(self) && self->topo->topo->sock_bound) {
if (self->topo && !UCC_TL_IS_SERVICE_TEAM(self) &&
self->topo->topo->sock_bound) {
tsize = UCC_TL_TEAM_SIZE(self);
max_radix = (ucc_topo_max_ppn(self->topo) == 1) ? tsize :
ucc_min(tsize, ucc_topo_min_socket_size(self->topo));
Expand Down
4 changes: 1 addition & 3 deletions src/core/ucc_service_coll.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -139,8 +139,6 @@ ucc_status_t ucc_service_coll_test(ucc_service_coll_req_t *req)
return status;
}

ucc_status_t ucc_collective_finalize_internal(ucc_coll_task_t *task);

ucc_status_t ucc_service_coll_finalize(ucc_service_coll_req_t *req)
{
ucc_status_t status;
Expand Down
5 changes: 4 additions & 1 deletion src/core/ucc_service_coll.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* See file LICENSE for terms.
*/

Expand Down Expand Up @@ -37,4 +37,7 @@ ucc_status_t ucc_internal_oob_init(ucc_team_t *team, ucc_subset_t subset,
ucc_team_oob_coll_t *oob);

void ucc_internal_oob_finalize(ucc_team_oob_coll_t *oob);

ucc_status_t ucc_collective_finalize_internal(ucc_coll_task_t *task);

#endif
Loading