File tree Expand file tree Collapse file tree 1 file changed +8
-1
lines changed Expand file tree Collapse file tree 1 file changed +8
-1
lines changed Original file line number Diff line number Diff line change 1616
1717#include " ../verifiable/verifiable.h"
1818
19+ #define DIVUP (x, y ) \
20+ (((x)+(y)-1 )/(y))
21+
1922int test_ncclVersion = 0 ; // init'd with ncclGetVersion()
2023
2124#if NCCL_MAJOR >= 2
@@ -1047,10 +1050,14 @@ testResult_t run() {
10471050 PRINT (" %s" , line);
10481051#endif
10491052
1053+ // Reserve 1GiB of memory for each 16GiB installed, but limit to a max of 4GiB
1054+ const size_t GB = (1ULL << 30 );
1055+ size_t reserveMem = std::min (DIVUP (maxMem, 16 *GB) * 1 *GB, 4 *GB);
10501056 // We need sendbuff, recvbuff, expected (when datacheck enabled), plus 1G for the rest.
1051- size_t memMaxBytes = (maxMem - ( 1 << 30 ) ) / (datacheck ? 3 : 2 );
1057+ size_t memMaxBytes = (maxMem - reserveMem - 1 *GB ) / (datacheck ? 3 : 2 );
10521058 if (maxBytes > memMaxBytes) {
10531059 maxBytes = memMaxBytes;
1060+ if (minBytes > maxBytes) minBytes = maxBytes;
10541061 if (proc == 0 ) printf (" #\n # Reducing maxBytes to %ld due to memory limitation\n " , maxBytes);
10551062 }
10561063
You can’t perform that action at this time.
0 commit comments