Q8GEMMSparse Class — pytorch Architecture
Architecture documentation for the Q8GEMMSparse class in q8gemm_sparse.cc from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/quantized/cpu/qnnpack/bench/q8gemm_sparse.cc lines 217–365
class Q8GEMMSparse : public benchmark::Fixture {
public:
inline Q8GEMMSparse(
uint32_t mr, uint32_t nr, uint32_t kr, uint32_t rbs, uint32_t cbs)
:
mr_(mr),
nr_(nr),
kr_(kr),
mc_(mr),
nc_(nr),
kc_(kr),
row_block_size_(rbs),
col_block_size_(cbs){}
void SetUp(const benchmark::State&) override {
std::random_device randomDevice;
auto rng = std::mt19937(randomDevice());
auto s32rng =
std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);
auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
auto f32rng =
std::bind(std::uniform_real_distribution<float>(1, 5), rng);
a_.resize(mc() * kc());
std::generate(a_.begin(), a_.end(), std::ref(u8rng));
k_.resize(nc() * kc());
b_.resize(nc());
std::generate(b_.begin(), b_.end(), std::ref(f32rng));
size_t num_zero_points_kernel = (nc_ + (nr_ -1)) & -nr_;
std::vector<uint8_t> kernel_zero_points(num_zero_points_kernel, 127);
std::generate(k_.begin(), k_.end(), std::ref(u8rng));
fillBlockSparseWeights(
k_.data(),
nc(),
kc(),
rowBlockSize(),
colBlockSize(),
sparsity(),
kernel_zero_points.data());
bcsr_matrix_ = qnnpack::generateBlockCSRMatrix<uint32_t>(
k_.data(),
nc(),
kc(),
rowBlockSize(),
colBlockSize(),
kernel_zero_points.data());
std::vector<float> dequantization_scales(num_zero_points_kernel, 0.75f);
c_.resize(mc() * nc());
std::fill(c_.begin(), c_.end(), 0xA5);
quantizationParams_ = pytorch_qnnp_conv_dynamic_quantization_params{
127,
kernel_zero_points.data(),
dequantization_scales.data(),
};
}
void TearDown(benchmark::State& state) override {
state.SetItemsProcessed(
uint64_t(state.iterations()) * 2 * mc() * nc() * kc());
a_.clear();
k_.clear();
b_.clear();
c_.clear();
}
inline const uint8_t* a() const {
return a_.data();
}
inline const uint8_t* k() const {
return k_.data();
}
inline const float* b() const {
return b_.data();
}
inline float* c() {
return c_.data();
}
inline uint32_t mr() const {
return mr_;
}
inline uint32_t mc() const {
return mc_;
}
inline uint32_t nr() const {
return nr_;
}
inline uint32_t nc() const {
return nc_;
}
inline uint32_t ncStride() const {
return roundUp(nc(), nr());
}
inline uint32_t kr() const {
return kr_;
}
inline uint32_t kc() const {
return kc_;
}
inline uint32_t kcStride() const {
return roundUp(kc(), kr());
}
inline size_t rowBlockSize() const {
return this->row_block_size_;
}
inline size_t colBlockSize() const {
return this->col_block_size_;
}
inline float sparsity() const {
return this->sparsity_;
}
inline const pytorch_qnnp_conv_dynamic_quantization_params* quantizationParams()
const {
return &quantizationParams_;
}
protected:
std::vector<uint8_t> a_;
std::vector<uint8_t> k_;
std::vector<float> b_;
std::unique_ptr<qnnpack::BCSRMatrix> bcsr_matrix_;
std::vector<float> c_;
uint32_t mr_{0};
uint32_t nr_{0};
uint32_t kr_{0};
uint32_t mc_{mr_};
uint32_t nc_{nr_};
uint32_t kc_{kr_};
uint32_t row_block_size_{1};
uint32_t col_block_size_{4};
float sparsity_{0.7f};
pytorch_qnnp_conv_dynamic_quantization_params quantizationParams_;
};
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free