convert_to_fp_of_same_size Class — pytorch Architecture

Architecture documentation for the convert_to_fp_of_same_size class in vec256.h from the pytorch codebase.

Class c

Entity Profile

Source Code

aten/src/ATen/cpu/vec/vec256/vec256.h lines 174–193

template <>
Vectorized<double> inline convert_to_fp_of_same_size<double>(
    const Vectorized<int64_t>& src) {
  __m256i magic_i_lo = _mm256_set1_epi64x(0x4330000000000000); /* 2^52 */
  __m256i magic_i_hi32 =
      _mm256_set1_epi64x(0x4530000080000000); /* 2^84 + 2^63 */
  __m256i magic_i_all =
      _mm256_set1_epi64x(0x4530000080100000); /* 2^84 + 2^63 + 2^52 */
  __m256d magic_d_all = _mm256_castsi256_pd(magic_i_all);

  __m256i v_lo = _mm256_blend_epi32(
      magic_i_lo, src, 0b01010101); /* v_low = low32 + 2^52 */
  __m256i v_hi = _mm256_srli_epi64(src, 32);
  v_hi = _mm256_xor_si256(
      v_hi, magic_i_hi32); /* v_hi = high32*2^32 + 2^84 + 2^63 */
  /* int64 = low32 + high32*2^32 = v_hi + v_lo - 2^52 - 2^63 - 2^84 */
  __m256d v_hi_dbl = _mm256_sub_pd(_mm256_castsi256_pd(v_hi), magic_d_all);
  __m256d result = _mm256_add_pd(v_hi_dbl, _mm256_castsi256_pd(v_lo));
  return result;
}

Source

View on GitHub

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free