File tree Expand file tree Collapse file tree 1 file changed +2
-5
lines changed Expand file tree Collapse file tree 1 file changed +2
-5
lines changed Original file line number Diff line number Diff line change 1414
1515#include " rccl/rccl.h"
1616
17-
18- #define RCCL_BFLOAT 1
19-
2017#if NCCL_VERSION_CODE >= NCCL_VERSION(2,10,0) && RCCL_BFLOAT16 ==1
2118 #define HAVE_ncclBfloat16 1
2219#else
@@ -124,7 +121,7 @@ namespace {
124121 return Y (x);
125122 }
126123 template <>
127- __host__ __device__ half castTo<__half>(float x) {
124+ __host__ __device__ __half castTo<__half>(float x) {
128125 return __float2half (x);
129126 }
130127 #if RCCL_BFLOAT16 == 1
@@ -425,7 +422,7 @@ __host__ __device__ void genSumXY(
425422 // Let s be the number of ranks per partition. This is either rn/pn as we
426423 // intended, or y/p_sum if that's smaller to prevent overshooting our target y.
427424 uint32_t s = y/p_sum < rn/pn ? y/p_sum : rn/pn;
428- x = r/s < pn ? 1 + r/s : 0 ; // First s*pn ranks contribute partition index +1.
425+ x = (s != 0 && r/s < pn) ? 1 + r/s : 0 ; // First s*pn ranks contribute partition index +1.
429426 x += r == rn-1 ? y - s*p_sum : 0 ; // Last rank contributes discrepancy.
430427}
431428}
You can’t perform that action at this time.
0 commit comments