diff --git a/src/core/quantizer/record_quantizer.h b/src/core/quantizer/record_quantizer.h index b1095a2ae..e107eebdd 100644 --- a/src/core/quantizer/record_quantizer.h +++ b/src/core/quantizer/record_quantizer.h @@ -44,11 +44,10 @@ class RecordQuantizer { scale = 254 / std::max(max - min, epsilon); bias = -min * scale - 127; for (size_t i = 0; i < dim; ++i) { - float v = vec[i] * scale + bias; + float v = std::round(vec[i] * scale + bias); squared_sum += v * v; sum += v; - (reinterpret_cast(out))[i] = - static_cast(std::round(v)); + (reinterpret_cast(out))[i] = static_cast(v); int8_sum += (reinterpret_cast(out))[i]; } extras = reinterpret_cast(static_cast(out) + dim);