|
21 | 21 | #include "cuda.h" |
22 | 22 | #include <vector> |
23 | 23 | #include <utility> |
| 24 | +#include <errno.h> /* program_invocation_short_name */ |
24 | 25 |
|
25 | 26 | //#define DEBUG_PRINT |
26 | 27 |
|
27 | 28 | #include "verifiable.h" |
28 | 29 | #include "git_version.h" |
29 | 30 |
|
| 31 | +#define DIVUP(x, y) \ |
| 32 | + (((x)+(y)-1)/(y)) |
| 33 | + |
30 | 34 | int test_ncclVersion = 0; // init'd with ncclGetVersion() |
31 | 35 | int32_t gpu_block3; |
32 | 36 | size_t cache_bytes = 192 * 1024 * 1024; // Use 192MB |
@@ -1446,6 +1450,7 @@ testResult_t run() { |
1446 | 1450 | #endif |
1447 | 1451 | is_main_thread = is_main_proc = (proc == 0) ? 1 : 0; |
1448 | 1452 |
|
| 1453 | + PRINT("# Collective test starting: %s\n", program_invocation_short_name); |
1449 | 1454 | PRINT("# nThread %d nGpus %d minBytes %ld maxBytes %ld step: %ld(%s) warmup iters: %d iters: %d agg iters: %d validation: %d graph: %d\n", |
1450 | 1455 | nThreads, nGpus, minBytes, maxBytes, |
1451 | 1456 | (stepFactor > 1)?stepFactor:stepBytes, (stepFactor > 1)?"factor":"bytes", |
@@ -1488,10 +1493,14 @@ testResult_t run() { |
1488 | 1493 | PRINT("%s", line); |
1489 | 1494 | #endif |
1490 | 1495 |
|
| 1496 | + // Reserve 1GiB of memory for each 16GiB installed, but limit to a max of 4GiB |
| 1497 | + const size_t GB = (1ULL << 30); |
| 1498 | + size_t reserveMem = std::min(DIVUP(maxMem, 16*GB) * 1*GB, 4*GB); |
1491 | 1499 | // We need sendbuff, recvbuff, expected (when datacheck enabled), plus 1G for the rest. |
1492 | | - size_t memMaxBytes = (maxMem - (1<<30)) / (datacheck ? 3 : 2); |
| 1500 | + size_t memMaxBytes = (maxMem - reserveMem - 1*GB) / (datacheck ? 3 : 2); |
1493 | 1501 | if (maxBytes > memMaxBytes) { |
1494 | 1502 | maxBytes = memMaxBytes; |
| 1503 | + if (minBytes > maxBytes) minBytes = maxBytes; |
1495 | 1504 | if (proc == 0) printf("#\n# Reducing maxBytes to %ld due to memory limitation\n", maxBytes); |
1496 | 1505 | } |
1497 | 1506 |
|
@@ -1723,6 +1732,7 @@ testResult_t run() { |
1723 | 1732 | PRINT("# Out of bounds values : %d %s\n", errors[0], errors[0] ? "FAILED" : "OK"); |
1724 | 1733 | PRINT("# Avg bus bandwidth : %g %s\n", bw[0], check_avg_bw == -1 ? "" : (bw[0] < check_avg_bw*(0.9) ? "FAILED" : "OK")); |
1725 | 1734 | PRINT("#\n"); |
| 1735 | + PRINT("# Collective test concluded: %s\n", program_invocation_short_name); |
1726 | 1736 | #ifdef MPI_SUPPORT |
1727 | 1737 | MPI_Comm_free(&mpi_comm); |
1728 | 1738 | MPI_Finalize(); |
|
0 commit comments