Skip to content

Commit b3f0716

Browse files
authored
Merge pull request ROCm#27 from edgargabriel/topic/half_prod_fix
fix algorithm assigning values in testsuite
2 parents 9d3a53d + e9f5be1 commit b3f0716

File tree

1 file changed

+2
-5
lines changed

1 file changed

+2
-5
lines changed

‎verifiable/verifiable.cu‎

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@
1414

1515
#include "rccl/rccl.h"
1616

17-
18-
#define RCCL_BFLOAT 1
19-
2017
#if NCCL_VERSION_CODE >= NCCL_VERSION(2,10,0) && RCCL_BFLOAT16 ==1
2118
#define HAVE_ncclBfloat16 1
2219
#else
@@ -124,7 +121,7 @@ namespace {
124121
return Y(x);
125122
}
126123
template<>
127-
__host__ __device__ half castTo<__half>(float x) {
124+
__host__ __device__ __half castTo<__half>(float x) {
128125
return __float2half(x);
129126
}
130127
#if RCCL_BFLOAT16 == 1
@@ -425,7 +422,7 @@ __host__ __device__ void genSumXY(
425422
// Let s be the number of ranks per partition. This is either rn/pn as we
426423
// intended, or y/p_sum if that's smaller to prevent overshooting our target y.
427424
uint32_t s = y/p_sum < rn/pn ? y/p_sum : rn/pn;
428-
x = r/s < pn ? 1 + r/s : 0; // First s*pn ranks contribute partition index +1.
425+
x = (s != 0 && r/s < pn) ? 1 + r/s : 0; // First s*pn ranks contribute partition index +1.
429426
x += r == rn-1 ? y - s*p_sum : 0; // Last rank contributes discrepancy.
430427
}
431428
}

0 commit comments

Comments
 (0)