Skip to content

Commit 690f97c

Browse files
committed
Merge pull request ROCm#147 from nileshnegi/sync/nccl-tests_v2.16.7
[SYNC] NCCL-Tests v2.16.7
2 parents a7809b3 + 6f1b11a commit 690f97c

File tree

2 files changed

+17
-4
lines changed

2 files changed

+17
-4
lines changed

‎src/Makefile‎

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,12 @@ HIPCUFLAGS := -std=c++14
2222
LDFLAGS :=
2323
HIPLDFLAGS :=
2424

25-
MPI ?= 0 # Set to 1 to enable MPI support (multi-process/multi-node)
26-
NAME_SUFFIX ?= # e.g. _mpi when using MPI=1
27-
DSO ?= 0 # Set to 1 to create and use libverifiable.so to reduce binary size
25+
# Set to 1 to enable MPI support (multi-process/multi-node)
26+
MPI ?= 0
27+
# e.g. Set to _mpi when using MPI=1
28+
NAME_SUFFIX ?=
29+
# Set to 1 to create and use libverifiable.so to reduce binary size
30+
DSO ?= 0
2831

2932
HIP_VERSION = $(strip $(shell which $(HIPCONFIG) >/dev/null && $(HIPCONFIG) --version))
3033
HIP_MAJOR = $(shell echo $(HIP_VERSION) | cut -d "." -f 1)

‎src/common.cu‎

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,16 @@
2121
#include "cuda.h"
2222
#include <vector>
2323
#include <utility>
24+
#include <errno.h> /* program_invocation_short_name */
2425

2526
//#define DEBUG_PRINT
2627

2728
#include "verifiable.h"
2829
#include "git_version.h"
2930

31+
#define DIVUP(x, y) \
32+
(((x)+(y)-1)/(y))
33+
3034
int test_ncclVersion = 0; // init'd with ncclGetVersion()
3135
int32_t gpu_block3;
3236
size_t cache_bytes = 192 * 1024 * 1024; // Use 192MB
@@ -1446,6 +1450,7 @@ testResult_t run() {
14461450
#endif
14471451
is_main_thread = is_main_proc = (proc == 0) ? 1 : 0;
14481452

1453+
PRINT("# Collective test starting: %s\n", program_invocation_short_name);
14491454
PRINT("# nThread %d nGpus %d minBytes %ld maxBytes %ld step: %ld(%s) warmup iters: %d iters: %d agg iters: %d validation: %d graph: %d\n",
14501455
nThreads, nGpus, minBytes, maxBytes,
14511456
(stepFactor > 1)?stepFactor:stepBytes, (stepFactor > 1)?"factor":"bytes",
@@ -1488,10 +1493,14 @@ testResult_t run() {
14881493
PRINT("%s", line);
14891494
#endif
14901495

1496+
// Reserve 1GiB of memory for each 16GiB installed, but limit to a max of 4GiB
1497+
const size_t GB = (1ULL << 30);
1498+
size_t reserveMem = std::min(DIVUP(maxMem, 16*GB) * 1*GB, 4*GB);
14911499
// We need sendbuff, recvbuff, expected (when datacheck enabled), plus 1G for the rest.
1492-
size_t memMaxBytes = (maxMem - (1<<30)) / (datacheck ? 3 : 2);
1500+
size_t memMaxBytes = (maxMem - reserveMem - 1*GB) / (datacheck ? 3 : 2);
14931501
if (maxBytes > memMaxBytes) {
14941502
maxBytes = memMaxBytes;
1503+
if (minBytes > maxBytes) minBytes = maxBytes;
14951504
if (proc == 0) printf("#\n# Reducing maxBytes to %ld due to memory limitation\n", maxBytes);
14961505
}
14971506

@@ -1723,6 +1732,7 @@ testResult_t run() {
17231732
PRINT("# Out of bounds values : %d %s\n", errors[0], errors[0] ? "FAILED" : "OK");
17241733
PRINT("# Avg bus bandwidth : %g %s\n", bw[0], check_avg_bw == -1 ? "" : (bw[0] < check_avg_bw*(0.9) ? "FAILED" : "OK"));
17251734
PRINT("#\n");
1735+
PRINT("# Collective test concluded: %s\n", program_invocation_short_name);
17261736
#ifdef MPI_SUPPORT
17271737
MPI_Comm_free(&mpi_comm);
17281738
MPI_Finalize();

0 commit comments

Comments
 (0)