compressed_rows Class — pytorch Architecture

Architecture documentation for the compressed_rows class in TensorConversions.cpp from the pytorch codebase.

Class cpp

Entity Profile

Source Code

aten/src/ATen/native/TensorConversions.cpp lines 1982–2073

template <class index_t, class scalar_t, bool compressed_rows>
static void _compressed_to_block_compressed_cpu_kernel(
    const index_t n_compressed, // Tensor size along compressed dimension
    const index_t n_plain, // Tensor size along plain dimension
    const index_t C, // Block size along compressed dimensions
    const index_t P, // Block size along plain dimension
    const index_t D, // Number of elements in dense dimensions
    const index_t* input_compressed_indices,
    const index_t* input_plain_indices,
    const scalar_t* input_values,
    index_t* result_compressed_indices,
    index_t* result_plain_indices,
    scalar_t* result_values) {
  // All blocks are possible, that is, may be allocated if a single
  // non-zero value lives within them. Otherwise they're not.

  // Allocate pointers for all possible plain blocks plus 1
  std::vector<scalar_t*> blocks(n_plain / P + 1, nullptr);

  assert(n_compressed % C == 0);
  assert(n_plain % P == 0);

  // Number of blocks along compressed dim
  index_t n_bcompressed = n_compressed / C;
  // Number of blocks along plain_dim
  index_t n_bplain = n_plain / P;

  // Number of elements per block
  index_t CPD = C * P * D;
  // Number of blocks overall
  index_t n_blks = 0;

  result_compressed_indices[0] = 0;

  // Iterate over blocks along compressed dim
  for (index_t block_c = 0; block_c < n_bcompressed; block_c++) {
    // Iterate over blocks along plain dim to locate non-zero blocks,
    // this guarantees sorted plain dim indices
    for (index_t block_p = 0; block_p < n_bplain; block_p++) {
      for (index_t i = input_compressed_indices[C * block_c];
           i < input_compressed_indices[C * (block_c + 1)];
           i++) {
        index_t p = input_plain_indices[i]; // plain dim element index
        if (p / P == block_p) {
          blocks[block_p] = result_values + CPD * n_blks;
          result_plain_indices[n_blks] = block_p;
          n_blks++;
          break;
        }
      }
    }

    // Iterate over compressed dim within block
    for (index_t cb = 0; cb < C; cb++) {
      index_t c = C * block_c + cb; // compressed dim index
      for (index_t i = input_compressed_indices[c];
           i < input_compressed_indices[c + 1];
           i++) {
        index_t p = input_plain_indices[i]; // plain dim index

        // Block corresponding to plain dim index
        index_t block_p = p / P;
        // Plain dim index within block
        index_t pb = p % P;

        // Specific blocks entries should not be visited more than
        // once.  Scipy code does an addition here. Why?
        // A possible answer: Scipy code supports "uncoalesced CSR"
        // format that allows repeated plain dim indices, and
        // compressed and plain indices may be unsorted.
        std::copy(
            input_values + i * D,
            input_values + (i + 1) * D,
            blocks[block_p] +
                (compressed_rows ? P * cb + pb : C * pb + cb) * D);
      }
    }

    // Scipy code has
    /*
      for (I i = input_compressed_indices[C * block_c];
           i < input_compressed_indices[C * (block_c + 1)];
           i++) {
             blocks[input_plain_indices[i] / P] = 0;
           }
    */
    // but we don't need it because the modified code (see the block_p
    // loop above) does not need to evaluate `blocks[block_p] == 0`
    // that the original code did.
    result_compressed_indices[block_c + 1] = n_blks;
  }
}

Source

View on GitHub

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free