compressed_rows Class — pytorch Architecture
Architecture documentation for the compressed_rows class in TensorConversions.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/TensorConversions.cpp lines 1982–2073
template <class index_t, class scalar_t, bool compressed_rows>
static void _compressed_to_block_compressed_cpu_kernel(
const index_t n_compressed, // Tensor size along compressed dimension
const index_t n_plain, // Tensor size along plain dimension
const index_t C, // Block size along compressed dimensions
const index_t P, // Block size along plain dimension
const index_t D, // Number of elements in dense dimensions
const index_t* input_compressed_indices,
const index_t* input_plain_indices,
const scalar_t* input_values,
index_t* result_compressed_indices,
index_t* result_plain_indices,
scalar_t* result_values) {
// All blocks are possible, that is, may be allocated if a single
// non-zero value lives within them. Otherwise they're not.
// Allocate pointers for all possible plain blocks plus 1
std::vector<scalar_t*> blocks(n_plain / P + 1, nullptr);
assert(n_compressed % C == 0);
assert(n_plain % P == 0);
// Number of blocks along compressed dim
index_t n_bcompressed = n_compressed / C;
// Number of blocks along plain_dim
index_t n_bplain = n_plain / P;
// Number of elements per block
index_t CPD = C * P * D;
// Number of blocks overall
index_t n_blks = 0;
result_compressed_indices[0] = 0;
// Iterate over blocks along compressed dim
for (index_t block_c = 0; block_c < n_bcompressed; block_c++) {
// Iterate over blocks along plain dim to locate non-zero blocks,
// this guarantees sorted plain dim indices
for (index_t block_p = 0; block_p < n_bplain; block_p++) {
for (index_t i = input_compressed_indices[C * block_c];
i < input_compressed_indices[C * (block_c + 1)];
i++) {
index_t p = input_plain_indices[i]; // plain dim element index
if (p / P == block_p) {
blocks[block_p] = result_values + CPD * n_blks;
result_plain_indices[n_blks] = block_p;
n_blks++;
break;
}
}
}
// Iterate over compressed dim within block
for (index_t cb = 0; cb < C; cb++) {
index_t c = C * block_c + cb; // compressed dim index
for (index_t i = input_compressed_indices[c];
i < input_compressed_indices[c + 1];
i++) {
index_t p = input_plain_indices[i]; // plain dim index
// Block corresponding to plain dim index
index_t block_p = p / P;
// Plain dim index within block
index_t pb = p % P;
// Specific blocks entries should not be visited more than
// once. Scipy code does an addition here. Why?
// A possible answer: Scipy code supports "uncoalesced CSR"
// format that allows repeated plain dim indices, and
// compressed and plain indices may be unsorted.
std::copy(
input_values + i * D,
input_values + (i + 1) * D,
blocks[block_p] +
(compressed_rows ? P * cb + pb : C * pb + cb) * D);
}
}
// Scipy code has
/*
for (I i = input_compressed_indices[C * block_c];
i < input_compressed_indices[C * (block_c + 1)];
i++) {
blocks[input_plain_indices[i] / P] = 0;
}
*/
// but we don't need it because the modified code (see the block_p
// loop above) does not need to evaluate `blocks[block_p] == 0`
// that the original code did.
result_compressed_indices[block_c + 1] = n_blks;
}
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free