Home / Class/ local_division Class — pytorch Architecture

local_division Class — pytorch Architecture

Architecture documentation for the local_division class in vec_test_all_types.h from the pytorch codebase.

Entity Profile

Source Code

aten/src/ATen/test/vec_test_all_types.h lines 1321–1389

template <typename T>
std::enable_if_t<is_complex<Complex<T>>::value, Complex<T>> local_division(Complex<T> x, Complex<T> y) {
#if defined(TEST_AGAINST_DEFAULT)
    return x / y;
#else /* defined(TEST_AGAINST_DEFAULT) */
    //re = (ac + bd)/abs_2()
    //im = (bc - ad)/abs_2()
    T x_real = x.real();
    T x_imag = x.imag();
    T y_real = y.real();
    T y_imag = y.imag();
    PreventFma noFma;
#if defined(CPU_CAPABILITY_ZVECTOR)
    T abs_c = std::abs(y_real);
    T abs_d = std::abs(y_imag);
    T scale = 1.0 / std::max(abs_c, abs_d);

    T a_sc = x_real * scale; // a/sc
    T b_sc = x_imag * scale; // b/sc
    T c_sc = y_real * scale; // c/sc
    T d_sc = y_imag * scale; // d/sc

    T ac_sc2 = a_sc * c_sc; // ac/sc^2
    T bd_sc2 = b_sc * d_sc; // bd/sc^2

    T neg_d_sc = -1.0 * d_sc; // -d/sc^2

    T neg_ad_sc2 = a_sc * neg_d_sc; // -ad/sc^2
    T bc_sc2 = b_sc * c_sc; // bc/sc^2

    T ac_bd_sc2 = noFma.add(ac_sc2, bd_sc2); // (ac+bd)/sc^2
    T bc_ad_sc2 = noFma.add(bc_sc2, neg_ad_sc2); // (bc-ad)/sc^2

    T c2_sc2 = c_sc * c_sc; // c^2/sc^2
    T d2_sc2 = d_sc * d_sc; // d^2/sc^2

    T c2_d2_sc2 = noFma.add(c2_sc2, d2_sc2); // (c^2+d^2)/sc^2

    T rr = ac_bd_sc2 / c2_d2_sc2; // (ac+bd)/(c^2+d^2)
    T ii = bc_ad_sc2 / c2_d2_sc2; // (bc-ad)/(c^2+d^2)

    return Complex<T>(rr, ii);
#else /* defined(CPU_CAPABILITY_ZVECTOR) */
#if defined(CPU_CAPABILITY_VSX)
    //check multiplication considering swap and fma
    T rr = x_real * y_real;
    T ii = x_imag * y_real;
    T neg_imag = -y_imag;
    rr = fma(x_imag, y_imag, rr);
    ii = fma(x_real, neg_imag, ii);
    //b.abs_2
#else /* defined(CPU_CAPABILITY_VSX) */
    T ac = x_real * y_real;
    T bd = x_imag * y_imag;
    T ad = x_real * y_imag;
    T bc = x_imag * y_real;
    T rr = noFma.add(ac, bd);
    T ii = noFma.sub(bc, ad);
#endif /* defined(CPU_CAPABILITY_VSX) */
    //b.abs_2()
    T abs_rr = y_real * y_real;
    T abs_ii = y_imag * y_imag;
    T abs_2 = noFma.add(abs_rr, abs_ii);
    rr = rr / abs_2;
    ii = ii / abs_2;
    return Complex<T>(rr, ii);
#endif /* defined(CPU_CAPABILITY_ZVECTOR) */
#endif /* defined(TEST_AGAINST_DEFAULT) */
}

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free