高度にテンプレート化されたC++のコードがどれだけ最適化されるかを確かめてみる。

あるベクトルと行列の積を計算するような関数 product() を作る。引数に、ベクトルや行列を表す様々なものを受け取れるよう、ジェネリックに作ることにする。

そのような場合に、コードが実際どのように最適化されるのかを見てみることにした。

例として、あるベクトルを要素とする対角行列のビューを作るアダプタをこの関数に渡してみると、どうなるか。

#include <cstddef>
#include <cstring>
#include <boost/array.hpp>
#include <boost/mpl/bool.hpp>
#include <boost/mpl/and.hpp>
#include <boost/multi_array.hpp>
#include <boost/type_traits/is_class.hpp>
#include <boost/utility/enable_if.hpp>
#include <boost/assert.hpp>
#include <iostream>

// metafunction to retrieve the element type of the given container
template<typename T_>
struct element_type_of
{
    template<typename TT_>
    struct element_type_cannot_be_retrieved_from_;

    // compile-time assertion
    enum { _ = sizeof(element_type_cannot_be_retrieved_from_<T_>) };
};

template<typename T_, std::size_t N_>
struct element_type_of<boost::array<T_, N_> >
{
    typedef T_ type;
};

template<typename T_, std::size_t N_>
struct element_type_of<T_[N_]>
{
    typedef T_ type;
};

// metafunction to determine if the given type can represent a vector.
template<typename T_>
struct is_vector: boost::mpl::false_ {};

template<typename T_, std::size_t N_>
struct is_vector<boost::array<T_, N_> >: boost::mpl::true_ {};

// metafunction to determine if the given type can represent a square matrix.
template<typename T_>
struct is_matrix: boost::mpl::false_ {};

template<typename T_, std::size_t N_>
struct is_matrix<T_[N_][N_]>: boost::mpl::true_ {};

template<typename T_, std::size_t N_>
struct is_matrix<boost::multi_array<T_, N_> >: boost::mpl::true_ {};

template<typename T_, std::size_t N_>
inline std::size_t size(const T_(&)[N_])
{
    return N_;
}

template<typename T_>
inline typename boost::enable_if<boost::is_class<T_>, std::size_t>::type size(T_ const& x)
{
    return x.size();
}

// an adapter to convert a vector to a diagonal matrix
template<typename Tvec_>
struct const_diagonal_matrix_adapter
{
    typedef typename element_type_of<Tvec_>::type element_type;
    typedef std::size_t size_type;

    struct view
    {

        size_type size() const
        {
            return ::size(vec_);
        }

        element_type const& operator[](size_type idx)
        {
            return idx_ != idx ? zero_: vec_[idx];
        }

        view(Tvec_ const& vec, size_type idx): vec_(vec), idx_(idx) {}

    private:
        static const element_type zero_;
        Tvec_ const& vec_;
        size_type idx_;
    };

    typedef view value_type;

    size_type size() const
    {
        return ::size(vec_);
    }

    value_type operator[](size_type idx) const
    {
        return view(vec_, idx);
    }

    const_diagonal_matrix_adapter(Tvec_ const& vec): vec_(vec) {}

private:
    Tvec_ const& vec_;
};

template<typename Tvec_>
const typename const_diagonal_matrix_adapter<Tvec_>::element_type
const_diagonal_matrix_adapter<Tvec_>::view::zero_ = 0.;


template<typename Tvec_>
inline const_diagonal_matrix_adapter<Tvec_> make_diagonal_matrix(Tvec_ const& vec)
{
    return const_diagonal_matrix_adapter<Tvec_>(vec);
}

// mark const_diagonal_matrix_adapter<> as a square_matrix
template<typename Tvec_>
struct is_matrix<const_diagonal_matrix_adapter<Tvec_> >: boost::mpl::true_ {};

// helper function to create a boost::const_multi_array_ref<> from an array
template<typename T_, std::size_t N1_, std::size_t N2_>
inline boost::const_multi_array_ref<T_, 2>
make_const_multi_array_ref(const T_(&v)[N1_][N2_])
{
    return boost::const_multi_array_ref<T_, 2>(
        reinterpret_cast<T_ const*>(v),
        boost::extents[N1_][N2_]);
}

// calculate the product of a vector and a matrix
template<typename T1, typename T2>
inline T1 product(T1 const& v, T2 const& m, typename boost::enable_if<boost::mpl::and_<is_vector<T1>, is_matrix<T2> > >::type* = 0)
{
    T1 retval;
    BOOST_ASSERT(size(v) == size(m) && size(m) == size(m[0]));
    for (std::size_t i = 0; i < size(v); ++i)
    {
        typename element_type_of<T1>::type x = 0;
        for (std::size_t j = 0; j < size(v); ++j)
        {
            x += v[j] * m[j][i];
        }
        retval[i] = x;
    }
    return retval;
}

// stream helper
template<typename Telem, typename Ttraits, typename Tvec_>
inline typename
boost::enable_if<is_vector<Tvec_>, std::basic_ostream<Telem, Ttraits> >::type&
operator<<(std::basic_ostream<Telem, Ttraits>& strm, Tvec_ const& vec)
{
    strm << "(" << vec[0] << ", " << vec[1] << ", " << vec[2] << ")";
    return strm;
}

int main(int, char**)
{
    boost::array<double, 3> const v1 = { { 1., 2. ,3. } };
    double const v2[3] = { 4., 5., 6. };
    double const m1[3][3] = { { 0., 1., 0. }, { 1., 0., 0. }, { 0., 0., 1. } };
    boost::multi_array<double, 2> const m2(make_const_multi_array_ref(m1));
    std::cout << v1 << "->" << product(v1, m1) << std::endl;;
    std::cout << v1 << "->" << product(v1, m2) << std::endl;;
    std::cout << v1 << "->" << product(v1, make_diagonal_matrix(v2)) << std::endl;
    return 0;
}

これを

g++ -S -O3 -o - templatized_math_operations.cpp | c++filt

としてみたときの結果から抜粋してみる。

まず product, double[3][3]>(boost::array const&, double const(&)[3][3], ...) の方。

	.section	.text._Z8productIN5boost5arrayIdLm3EEEA3_A3_dET_RKS5_RKT0_PNS0_9enable_ifINS0_3mpl4and_I9is_vectorIS5_E9is_matrixIS8_EN4mpl_5bool_ILb1EEESK_SK_EEvE4typeE,"axG",@progbits,boost::array<double, 3ul> product<boost::array<double, 3ul>, double [3][3]>(boost::array<double, 3ul> const&, double const (&) [3][3], boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<double [3][3]>, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*),comdat
	.p2align 4,,15
	.weak	boost::array<double, 3ul> product<boost::array<double, 3ul>, double [3][3]>(boost::array<double, 3ul> const&, double const (&) [3][3], boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<double [3][3]>, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*)
	.type	boost::array<double, 3ul> product<boost::array<double, 3ul>, double [3][3]>(boost::array<double, 3ul> const&, double const (&) [3][3], boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<double [3][3]>, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*), @function
boost::array<double, 3ul> product<boost::array<double, 3ul>, double [3][3]>(boost::array<double, 3ul> const&, double const (&) [3][3], boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<double [3][3]>, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*):
.LFB2667:
	.cfi_startproc
	.cfi_personality 0x3,__gxx_personality_v0
	movsd	(%rsi), %xmm0
	movq	%rdi, %rax
	movsd	8(%rsi), %xmm2
	mulsd	(%rdx), %xmm0
	xorpd	%xmm1, %xmm1
	mulsd	24(%rdx), %xmm2
	addsd	%xmm1, %xmm0
	addsd	%xmm2, %xmm0
	movsd	16(%rsi), %xmm2
	mulsd	48(%rdx), %xmm2
	addsd	%xmm2, %xmm0
	movsd	%xmm0, (%rdi)
	movsd	(%rsi), %xmm0
	movsd	8(%rsi), %xmm2
	mulsd	8(%rdx), %xmm0
	mulsd	32(%rdx), %xmm2
	addsd	%xmm1, %xmm0
	addsd	%xmm2, %xmm0
	movsd	16(%rsi), %xmm2
	mulsd	56(%rdx), %xmm2
	addsd	%xmm2, %xmm0
	movsd	%xmm0, 8(%rdi)
	movsd	(%rsi), %xmm0
	mulsd	16(%rdx), %xmm0
	addsd	%xmm1, %xmm0
        # この辺で xmm1 の値を頑張って保持しなくてもいいや、と思ったらしく
        # xmm2 を使うのを止めている。なぜ。
	movsd	8(%rsi), %xmm1
	mulsd	40(%rdx), %xmm1
	addsd	%xmm1, %xmm0
	movsd	16(%rsi), %xmm1
	mulsd	64(%rdx), %xmm1
	addsd	%xmm1, %xmm0
	movsd	%xmm0, 16(%rdi)
	ret
	.cfi_endproc
.LFE2667:
	.size	boost::array<double, 3ul> product<boost::array<double, 3ul>, double [3][3]>(boost::array<double, 3ul> const&, double const (&) [3][3], boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<double [3][3]>, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*), .-boost::array<double, 3ul> product<boost::array<double, 3ul>, double [3][3]>(boost::array<double, 3ul> const&, double const (&) [3][3], boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<double [3][3]>, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*)

不要な xorpd % と addsd %xmm1, %xmm0 が一部できてしまっているけれども、基本的に真っ直ぐなコードを生成してくれた。

次は、product, boost::multi_array >(boost::array const&, boost::multi_array const&, ...) 。行列の大きさは、コンパイル時には決定できないので、ループの展開に失敗し、済し崩しで分岐の多いコードになっている。

	.section	.text._Z8productIN5boost5arrayIdLm3EEENS0_11multi_arrayIdLm2ESaIdEEEET_RKS6_RKT0_PNS0_9enable_ifINS0_3mpl4and_I9is_vectorIS6_E9is_matrixIS9_EN4mpl_5bool_ILb1EEESL_SL_EEvE4typeE,"axG",@progbits,boost::array<double, 3ul> product<boost::array<double, 3ul>, boost::multi_array<double, 2ul, std::allocator<double> > >(boost::array<double, 3ul> const&, boost::multi_array<double, 2ul, std::allocator<double> > const&, boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<boost::multi_array<double, 2ul, std::allocator<double> > >, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*),comdat
	.p2align 4,,15
	.weak	boost::array<double, 3ul> product<boost::array<double, 3ul>, boost::multi_array<double, 2ul, std::allocator<double> > >(boost::array<double, 3ul> const&, boost::multi_array<double, 2ul, std::allocator<double> > const&, boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<boost::multi_array<double, 2ul, std::allocator<double> > >, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*)
	.type	boost::array<double, 3ul> product<boost::array<double, 3ul>, boost::multi_array<double, 2ul, std::allocator<double> > >(boost::array<double, 3ul> const&, boost::multi_array<double, 2ul, std::allocator<double> > const&, boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<boost::multi_array<double, 2ul, std::allocator<double> > >, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*), @function
boost::array<double, 3ul> product<boost::array<double, 3ul>, boost::multi_array<double, 2ul, std::allocator<double> > >(boost::array<double, 3ul> const&, boost::multi_array<double, 2ul, std::allocator<double> > const&, boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<boost::multi_array<double, 2ul, std::allocator<double> > >, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*):
.LFB2670:
	.cfi_startproc
	.cfi_personality 0x3,__gxx_personality_v0
	subq	$8, %rsp
	.cfi_def_cfa_offset 16
	cmpq	$3, 32(%rdx)
	movq	%rdi, %rax
	jne	.L80
	movq	64(%rdx), %r9
	movq	80(%rdx), %rcx
	movq	%r9, %rdi
	salq	$3, %rcx
	addq	(%rdx), %rcx
	negq	%rdi
	testq	%rdi, %rdi
	js	.L88
	cmpq	$2, %rdi
	ja	.L89
	cmpq	$3, 40(%rdx)
	jne	.L80
	movl	$1, %r10d
	movl	$2, %r8d
	movq	72(%rdx), %rdi
	movq	%r10, %r11
	subq	%r9, %r8
	subq	%r9, %r11
	cmpq	$2, %r8
	ja	.L84
	cmpq	$2, %r11
	ja	.L85
	movq	%rdi, %r8
	movsd	(%rsi), %xmm0
	negq	%r8
	testq	%r8, %r8
	js	.L86
	cmpq	$2, %r8
	ja	.L87
	movq	48(%rdx), %r8
	mulsd	(%rcx), %xmm0
	movsd	8(%rsi), %xmm2
	xorpd	%xmm1, %xmm1
	leaq	(%rcx,%r8,8), %r9
	salq	$4, %r8
	subq	%rdi, %r10
	leaq	(%rcx,%r8), %r8
	mulsd	(%r9), %xmm2
	addsd	%xmm1, %xmm0
	addsd	%xmm2, %xmm0
	movsd	16(%rsi), %xmm2
	mulsd	(%r8), %xmm2
	addsd	%xmm2, %xmm0
	movsd	%xmm0, (%rax)
	movsd	(%rsi), %xmm0
	js	.L86
	cmpq	$2, %r10
	ja	.L87
	movq	56(%rdx), %rdx
	movsd	8(%rsi), %xmm2
	movl	$2, %r10d
	subq	%rdi, %r10
	mulsd	(%rcx,%rdx,8), %xmm0
	mulsd	(%r9,%rdx,8), %xmm2
	addsd	%xmm1, %xmm0
	addsd	%xmm2, %xmm0
	movsd	16(%rsi), %xmm2
	mulsd	(%r8,%rdx,8), %xmm2
	addsd	%xmm2, %xmm0
	movsd	%xmm0, 8(%rax)
	movsd	(%rsi), %xmm0
	js	.L86
	cmpq	$2, %r10
	ja	.L87
	salq	$4, %rdx
	mulsd	(%rcx,%rdx), %xmm0
	addsd	%xmm1, %xmm0
	movsd	8(%rsi), %xmm1
	mulsd	(%r9,%rdx), %xmm1
	addsd	%xmm1, %xmm0
	movsd	16(%rsi), %xmm1
	mulsd	(%r8,%rdx), %xmm1
	addsd	%xmm1, %xmm0
	movsd	%xmm0, 16(%rax)
	addq	$8, %rsp
	ret
.L84:
	negq	%rdi
	testq	%rdi, %rdi
	js	.L86
	cmpq	$2, %rdi
	ja	.L87
	testq	%r11, %r11
	.p2align 4,,3
	js	.L88
	cmpq	$2, %r11
	.p2align 4,,5
	ja	.L89
	testq	%r8, %r8
	.p2align 4,,5
	jns	.L89
.L88:
	movl	boost::detail::multi_array::const_sub_array<double, 1ul, double const*> boost::detail::multi_array::value_accessor_n<double, 2ul>::access<boost::detail::multi_array::const_sub_array<double, 1ul, double const*>, double const*>(boost::type<boost::detail::multi_array::const_sub_array<double, 1ul, double const*> >, long, double const*, unsigned long const*, long const*, long const*) const::__PRETTY_FUNCTION__, %ecx
	movl	$135, %edx
	movl	$.LC2, %esi
	movl	$.LC3, %edi
	call	__assert_fail
.L87:
	movl	double const& boost::detail::multi_array::value_accessor_one<double>::access<double const&, double const*>(boost::type<double const&>, long, double const*, unsigned long const*, long const*, long const*) const::__PRETTY_FUNCTION__, %ecx
	movl	$178, %edx
	movl	$.LC2, %esi
	movl	$.LC4, %edi
	call	__assert_fail
.L86:
	movl	double const& boost::detail::multi_array::value_accessor_one<double>::access<double const&, double const*>(boost::type<double const&>, long, double const*, unsigned long const*, long const*, long const*) const::__PRETTY_FUNCTION__, %ecx
	movl	$177, %edx
	movl	$.LC2, %esi
	movl	$.LC3, %edi
	call	__assert_fail
.L85:
	negq	%rdi
	testq	%rdi, %rdi
	js	.L86
	cmpq	$2, %rdi
	ja	.L87
	testq	%r11, %r11
	.p2align 4,,3
	js	.L88
.L89:
	movl	boost::detail::multi_array::const_sub_array<double, 1ul, double const*> boost::detail::multi_array::value_accessor_n<double, 2ul>::access<boost::detail::multi_array::const_sub_array<double, 1ul, double const*>, double const*>(boost::type<boost::detail::multi_array::const_sub_array<double, 1ul, double const*> >, long, double const*, unsigned long const*, long const*, long const*) const::__PRETTY_FUNCTION__, %ecx
	movl	$136, %edx
	movl	$.LC2, %esi
	movl	$.LC4, %edi
	call	__assert_fail
.L80:
	movl	boost::array<double, 3ul> product<boost::array<double, 3ul>, boost::multi_array<double, 2ul, std::allocator<double> > >(boost::array<double, 3ul> const&, boost::multi_array<double, 2ul, std::allocator<double> > const&, boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<boost::multi_array<double, 2ul, std::allocator<double> > >, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*)::__PRETTY_FUNCTION__, %ecx
	movl	$140, %edx
	movl	$.LC8, %esi
	movl	$.LC9, %edi
	call	__assert_fail
	.cfi_endproc
.LFE2670:
	.size	boost::array<double, 3ul> product<boost::array<double, 3ul>, boost::multi_array<double, 2ul, std::allocator<double> > >(boost::array<double, 3ul> const&, boost::multi_array<double, 2ul, std::allocator<double> > const&, boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<boost::multi_array<double, 2ul, std::allocator<double> > >, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*), .-boost::array<double, 3ul> product<boost::array<double, 3ul>, boost::multi_array<double, 2ul, std::allocator<double> > >(boost::array<double, 3ul> const&, boost::multi_array<double, 2ul, std::allocator<double> > const&, boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<boost::multi_array<double, 2ul, std::allocator<double> > >, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*)

そして、最後に、product, const_diagonal_matrix_adapter >(boost::array const&, const_diagonal_matrix_adapter const&, ...) の方。

	.section	.text._Z8productIN5boost5arrayIdLm3EEE29const_diagonal_matrix_adapterIA3_dEET_RKS6_RKT0_PNS0_9enable_ifINS0_3mpl4and_I9is_vectorIS6_E9is_matrixIS9_EN4mpl_5bool_ILb1EEESL_SL_EEvE4typeE,"axG",@progbits,boost::array<double, 3ul> product<boost::array<double, 3ul>, const_diagonal_matrix_adapter<double [3]> >(boost::array<double, 3ul> const&, const_diagonal_matrix_adapter<double [3]> const&, boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<const_diagonal_matrix_adapter<double [3]> >, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*),comdat
	.p2align 4,,15
	.weak	boost::array<double, 3ul> product<boost::array<double, 3ul>, const_diagonal_matrix_adapter<double [3]> >(boost::array<double, 3ul> const&, const_diagonal_matrix_adapter<double [3]> const&, boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<const_diagonal_matrix_adapter<double [3]> >, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*)
	.type	boost::array<double, 3ul> product<boost::array<double, 3ul>, const_diagonal_matrix_adapter<double [3]> >(boost::array<double, 3ul> const&, const_diagonal_matrix_adapter<double [3]> const&, boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<const_diagonal_matrix_adapter<double [3]> >, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*), @function
boost::array<double, 3ul> product<boost::array<double, 3ul>, const_diagonal_matrix_adapter<double [3]> >(boost::array<double, 3ul> const&, const_diagonal_matrix_adapter<double [3]> const&, boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<const_diagonal_matrix_adapter<double [3]> >, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*):
.LFB2675:
	.cfi_startproc
	.cfi_personality 0x3,__gxx_personality_v0
	movq	(%rdx), %rdx
	xorpd	%xmm0, %xmm0
	movsd	8(%rsi), %xmm2
	movq	%rdi, %rax
	movsd	(%rdx), %xmm1
	mulsd	%xmm0, %xmm2
	mulsd	(%rsi), %xmm1
	addsd	%xmm0, %xmm1
	addsd	%xmm2, %xmm1
	movsd	16(%rsi), %xmm2
	mulsd	%xmm0, %xmm2
	addsd	%xmm2, %xmm1
	movsd	%xmm1, (%rdi)
	movsd	(%rsi), %xmm1
	movsd	8(%rdx), %xmm2
	mulsd	%xmm0, %xmm1
	mulsd	8(%rsi), %xmm2
	addsd	%xmm0, %xmm1
	addsd	%xmm2, %xmm1
	movsd	16(%rsi), %xmm2
	mulsd	%xmm0, %xmm2
	addsd	%xmm2, %xmm1
	movsd	%xmm1, 8(%rdi)
	movsd	(%rsi), %xmm1
	mulsd	%xmm0, %xmm1
	addsd	%xmm0, %xmm1
	mulsd	8(%rsi), %xmm0
	addsd	%xmm0, %xmm1
	movsd	16(%rdx), %xmm0
	mulsd	16(%rsi), %xmm0
	addsd	%xmm1, %xmm0
	movsd	%xmm0, 16(%rdi)
	ret
	.cfi_endproc
.LFE2675:
	.size	boost::array<double, 3ul> product<boost::array<double, 3ul>, const_diagonal_matrix_adapter<double [3]> >(boost::array<double, 3ul> const&, const_diagonal_matrix_adapter<double [3]> const&, boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<const_diagonal_matrix_adapter<double [3]> >, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*), .-boost::array<double, 3ul> product<boost::array<double, 3ul>, const_diagonal_matrix_adapter<double [3]> >(boost::array<double, 3ul> const&, const_diagonal_matrix_adapter<double [3]> const&, boost::enable_if<boost::mpl::and_<is_vector<boost::array<double, 3ul> >, is_matrix<const_diagonal_matrix_adapter<double [3]> >, mpl_::bool_<true>, mpl_::bool_<true>, mpl_::bool_<true> >, void>::type*)

すごい。ループの unroll から始まって、何もかもが展開されている。
正直ここまでやってくれるとは思わなかった。

ただ、ここまでやってくれているのだから、mulsd %xmm0, XXX は削減してほしかったなあ。

コンパイラのバージョン:

gcc version 4.4.1 (Ubuntu 4.4.1-4ubuntu9) 
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy