Skip to content

Commit

Permalink
issue p12tic#115 Proof of concept
Browse files Browse the repository at this point in the history
* Add google benchmark as ExternalProject
* Add three bench suite transform "unary", reduce "unary", load/store

Todo:
* strange behavior on transform bench suite. STD seems faster than SIMD on MSVC2017 <--- to be checked on gcc>5
* add other cases
  • Loading branch information
ThomasRetornaz committed Jul 16, 2018
1 parent 9a3636a commit 6ae2a4a
Show file tree
Hide file tree
Showing 12 changed files with 513 additions and 4 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,10 @@ install(FILES

enable_testing()

option(ENABLE_BENCH "Set to on in order to compile bench suite, work only in release mode" OFF)

add_subdirectory(simdpp)
add_subdirectory(test)
if(ENABLE_BENCH)
add_subdirectory(bench)
endif()
2 changes: 2 additions & 0 deletions bench/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
add_subdirectory(thirdparty)
add_subdirectory(insn)
36 changes: 36 additions & 0 deletions bench/insn/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
include_directories(${libsimdpp_SOURCE_DIR})
include_directories(${GOOGLE_BENCHMARK_INCLUDE_DIRS})

set(TEST_BENCH_SOURCES
main.cc
main.h
)

set(BENCH_INSN_ARCH_SOURCES
algorithm/transform_unary.cc
algorithm/reduce_unary.cc
load_store.cc
)

set_property(GLOBAL PROPERTY USE_FOLDERS ON)
foreach(ARCH ${COMPILABLE_ARCHS}})
simdpp_get_arch_info(CXX_FLAGS DEFINES_LIST SUFFIX ${ARCH})
#message("Create benchmark for arch : ${SUFFIX} with flags: ${CXX_FLAGS} with defines ${DEFINES_LIST}")
SET(exename "bench_insn_${SUFFIX}")
add_executable(${exename} ${BENCH_INSN_ARCH_SOURCES} ${TEST_BENCH_SOURCES})
set_target_properties( ${exename} PROPERTIES COMPILE_FLAGS "${CXX_FLAGS}" )
set_target_properties (${exename} PROPERTIES FOLDER bench)
if(WIN32)
target_link_libraries(${exename}
PUBLIC benchmark
PUBLIC shlwapi.lib
)
else()
target_link_libraries(${exename}
PUBLIC benchmark
)
add_dependencies(${exename} ${GOOGLE_BENCHMARK})
endif()
endforeach(ARCH ${${COMPILABLE_ARCHS}})


122 changes: 122 additions & 0 deletions bench/insn/algorithm/reduce_unary.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/* Copyright (C) 2018 Povilas Kanapickas <[email protected]>
Copyright (C) 2018 Thomas Retornaz <[email protected]>
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/

#include "benchmark/benchmark.h"
#include <vector>
#include <numeric>
#include <iterator>
#include <simdpp/simd.h>
//algorithm
#include <simdpp/algorithm/reduce.h>


namespace {

template <typename T>
struct GeneratorConstant
{
GeneratorConstant(T constant) { m_constant = constant; }
T operator()() { return m_constant; }
T m_constant;
};


template<typename T, class Generator>
std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>> DataGenerator(std::size_t size, Generator gen)
{

using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
vector_aligned_t input(size);
std::generate(input.begin(), input.end(), gen);
return input;
}

/*********************UNARY****************************/

template<typename T>
class ReduceUnaryFixture : public ::benchmark::Fixture {
public:
void SetUp(const ::benchmark::State& st)
{
m_inputvect = DataGenerator<T, GeneratorConstant<T>>((size_t)st.range(0), GeneratorConstant<T>(1));
}
void TearDown(const ::benchmark::State&)
{
m_inputvect.clear();
}
using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
vector_aligned_t m_inputvect;
};

//UINT64_T
BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st)
{
const auto size= (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(),(uint64_t)0));
}
}
BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryUNINT64_STD_Test, uint64_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(std::accumulate(m_inputvect.begin(), m_inputvect.end(), (uint64_t)0));
}
}
BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);


//FLOAT
BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryFLOAT_SIMD_Test, float)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), (float)0));
}
}
BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryFLOAT_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryFLOAT_STD_Test, float)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(std::accumulate(m_inputvect.begin(), m_inputvect.end(), (float)0));
}
}
BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryFLOAT_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

//DOUBLE
BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryDOUBLE_SIMD_Test, double)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(simdpp::reduce(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), (double)0));
}
}
BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryDOUBLE_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(ReduceUnaryFixture, UnaryDOUBLE_STD_Test, double)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(std::accumulate(m_inputvect.begin(), m_inputvect.end(), (double)0));
}
}
BENCHMARK_REGISTER_F(ReduceUnaryFixture, UnaryDOUBLE_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);



} // namespace
205 changes: 205 additions & 0 deletions bench/insn/algorithm/transform_unary.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
/* Copyright (C) 2018 Povilas Kanapickas <[email protected]>
Copyright (C) 2018 Thomas Retornaz <[email protected]>
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/

#include "benchmark/benchmark.h"
#include <vector>
#include <algorithm>
#include <iterator>
#include <simdpp/simd.h>
//algorithm
#include <simdpp/algorithm/transform.h>


namespace {

template< typename T>
struct UnaryOpAddValue
{
T m_val;
public:
UnaryOpAddValue(T val) :m_val(val) {}
SIMDPP_INL T operator()(T const &a) const SIMDPP_NOEXCEPT
{
return m_val + a;
}

template<typename U>
SIMDPP_INL U operator()(U const &a) const SIMDPP_NOEXCEPT
{
return simdpp::add(m_val,a);
}
};

template <typename T>
struct GeneratorConstant
{
GeneratorConstant(T constant) { m_constant = constant; }
T operator()() { return m_constant; }
T m_constant;
};


template<typename T, class Generator>
std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>> DataGenerator(std::size_t size, Generator gen)
{

using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
vector_aligned_t input(size);
std::generate(input.begin(), input.end(), gen);
return input;
}

/*********************UNARY****************************/

template<typename T>
class TransformUnaryFixture : public ::benchmark::Fixture {
public:
void SetUp(const ::benchmark::State& st)
{
m_inputvect = DataGenerator<T, GeneratorConstant<T>>((size_t)st.range(0), GeneratorConstant<T>(42));
m_outputvect.resize((size_t)st.range(0));
}
void TearDown(const ::benchmark::State&)
{
m_inputvect.clear();
m_outputvect.clear();
}
using vector_aligned_t = std::vector<T, simdpp::aligned_allocator<T, simdpp::simd_traits<T>::alignment>>;
vector_aligned_t m_inputvect;
vector_aligned_t m_outputvect;
UnaryOpAddValue<T> opPlusOne= UnaryOpAddValue<T>(1);
};

//UINT8_T
BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT8_SIMD_Test, uint8_t)(benchmark::State& st)
{
const auto size= (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne));
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT8_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT8_STD_Test, uint8_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
benchmark::DoNotOptimize(std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne));
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT8_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

//UINT16_T
BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT16_SIMD_Test, uint16_t)(benchmark::State& st)
{
const auto size= (size_t)st.range(0);
while (st.KeepRunning())
{
simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne);
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT16_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT16_STD_Test, uint16_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne);
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT16_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

//UINT32_T
BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT32_SIMD_Test, uint32_t)(benchmark::State& st)
{
const auto size= (size_t)st.range(0);
while (st.KeepRunning())
{
simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne);
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT32_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT32_STD_Test, uint32_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne);
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT32_STD_Test)->Arg(1)->Arg(10)->Arg(31)->Arg(100)->Arg(1000)->Arg(10000);

//UINT64_T
BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT64_SIMD_Test, uint64_t)(benchmark::State& st)
{
const auto size= (size_t)st.range(0);
while (st.KeepRunning())
{
simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne);
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT64_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryUNINT64_STD_Test, uint64_t)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne);
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryUNINT64_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

//FLOAT
BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryFloat_SIMD_Test, float)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne);
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryFloat_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);


BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryFloat_STD_Test, float)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne);
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryFloat_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

//DOUBLE
BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryDouble_SIMD_Test, double)(benchmark::State& st)
{
const auto size= (size_t)st.range(0);
while (st.KeepRunning())
{
simdpp::transform(m_inputvect.data(), m_inputvect.data() + m_inputvect.size(), m_outputvect.data(), opPlusOne);
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryDouble_SIMD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

BENCHMARK_TEMPLATE_DEFINE_F(TransformUnaryFixture, UnaryDouble_STD_Test, double)(benchmark::State& st)
{
const auto size = (size_t)st.range(0);
while (st.KeepRunning())
{
std::transform(m_inputvect.begin(), m_inputvect.end(), m_outputvect.begin(), opPlusOne);
}
}
BENCHMARK_REGISTER_F(TransformUnaryFixture, UnaryDouble_STD_Test)->Arg(1)->Arg(10)->Arg(32)->Arg(100)->Arg(1000)->Arg(10000);

} // namespace
Loading

0 comments on commit 6ae2a4a

Please sign in to comment.