diff --git a/bestla/bestla/bestla_prologue_b.h b/bestla/bestla/bestla_prologue_b.h index 2655357bc..b543ab1cf 100644 --- a/bestla/bestla/bestla_prologue_b.h +++ b/bestla/bestla/bestla_prologue_b.h @@ -136,7 +136,6 @@ class WeightKBlockNInteger { } void setDoubleQuantCorrection(utils::avector* dq_buf, StorageWeight* ptr) { - // TODO(zhe): parallel data_cpy. if (ptr->SDtype() == BTLA_DTYPE::DQ8_BNB) { auto packw_dqbuf_ptr = ptr->DQPtr(); memcpy(packw_dqbuf_ptr, dq_buf->data(), dq_buf->size() * sizeof(float)); diff --git a/bestla/bestla/bestla_utils.h b/bestla/bestla/bestla_utils.h index 3cd439c14..891cdd80d 100644 --- a/bestla/bestla/bestla_utils.h +++ b/bestla/bestla/bestla_utils.h @@ -298,6 +298,8 @@ inline const char* bestla_dtype_str(BTLA_DTYPE dtype) { return "fp8_e5m2"; case BTLA_DTYPE::F8_E3M4: return "fp8_e3m4"; + case BTLA_DTYPE::F8_E8M0: + return "fp8_e8m0"; case BTLA_DTYPE::S8: return "signed_int8"; case BTLA_DTYPE::U8: diff --git a/bestla/bestla/ut/bestla_ut.h b/bestla/bestla/ut/bestla_ut.h index a782ff609..9a7e3eefd 100644 --- a/bestla/bestla/ut/bestla_ut.h +++ b/bestla/bestla/ut/bestla_ut.h @@ -40,7 +40,7 @@ static int8_t cache[CacheSize]; #define FP16_ERR 0.001f #define BF16_ERR 0.02f #define INT8_ERR 0.2f -#define F8_ERR 1.4f +#define F8_ERR 1.5f #define INT4_ERR 3.f #define FP4_ERR 3.f