BLOCK_N Class — pytorch Architecture
Architecture documentation for the BLOCK_N class in int4mm_kernel.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/int4mm_kernel.cpp lines 481–521
template<int BLOCK_N>
inline float convert_int4_to_float(const uint8_t* b, int n) {
static constexpr float lut[16] = {
-8.0f, -7.0f, -6.0f, -5.0f,
-4.0f, -3.0f, -2.0f, -1.0f,
0.0f, 1.0f, 2.0f, 3.0f,
4.0f, 5.0f, 6.0f, 7.0f
};
int index;
#if defined(CPU_CAPABILITY_AVX512) && !defined(_MSC_VER)
if constexpr (BLOCK_N == 64) {
const int nb = n/BLOCK_N;
n -= nb*BLOCK_N;
if (n < 32) {
auto val = b[nb * BLOCK_N / 2 + n];
index = val & 0x0f;
} else {
auto val = b[nb * BLOCK_N / 2 + (n - 32)];
index = val >> 4;
}
} else
#elif defined(CPU_CAPABILITY_AVX2) && !defined(_MSC_VER)
if constexpr (BLOCK_N == 32) {
const int nb = n/BLOCK_N;
n -= nb*BLOCK_N;
if (n < 16) {
auto val = b[nb * BLOCK_N / 2 + n];
index = val & 0x0f;
} else {
auto val = b[nb * BLOCK_N / 2 + (n - 16)];
index = val >> 4;
}
} else
#endif
{
const auto is_even = (n & 1) == 0;
auto val = b[n/2];
index = is_even ? (val & 0x0F) : (val >> 4);
}
return lut[index];
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free