Spaces:
Runtime error
Runtime error
| // We don't use THRUST_PP_STRINGIZE and THRUST_PP_CAT because they are new, and | |
| // we want this benchmark to be backwards-compatible to older versions of Thrust. | |
| // We don't use THRUST_NOEXCEPT because it's new, and we want this benchmark to | |
| // be backwards-compatible to older versions of Thrust. | |
| /////////////////////////////////////////////////////////////////////////////// | |
| template <typename T> | |
| struct squared_difference | |
| { | |
| private: | |
| T const average; | |
| public: | |
| __host__ __device__ | |
| squared_difference(squared_difference const& rhs) : average(rhs.average) {} | |
| __host__ __device__ | |
| squared_difference(T average_) : average(average_) {} | |
| __host__ __device__ | |
| T operator()(T x) const | |
| { | |
| return (x - average) * (x - average); | |
| } | |
| }; | |
| template <typename T> | |
| struct value_and_count | |
| { | |
| T value; | |
| uint64_t count; | |
| __host__ __device__ | |
| value_and_count(value_and_count const& other) | |
| : value(other.value), count(other.count) {} | |
| __host__ __device__ | |
| value_and_count(T const& value_) | |
| : value(value_), count(1) {} | |
| __host__ __device__ | |
| value_and_count(T const& value_, uint64_t count_) | |
| : value(value_), count(count_) {} | |
| __host__ __device__ | |
| value_and_count& operator=(value_and_count const& other) | |
| { | |
| value = other.value; | |
| count = other.count; | |
| return *this; | |
| } | |
| __host__ __device__ | |
| value_and_count& operator=(T const& value_) | |
| { | |
| value = value_; | |
| count = 1; | |
| return *this; | |
| } | |
| }; | |
| template <typename T, typename ReduceOp> | |
| struct counting_op | |
| { | |
| private: | |
| ReduceOp reduce; | |
| public: | |
| __host__ __device__ | |
| counting_op() : reduce() {} | |
| __host__ __device__ | |
| counting_op(counting_op const& other) : reduce(other.reduce) {} | |
| __host__ __device__ | |
| counting_op(ReduceOp const& reduce_) : reduce(reduce_) {} | |
| __host__ __device__ | |
| value_and_count<T> operator()( | |
| value_and_count<T> const& x | |
| , T const& y | |
| ) const | |
| { | |
| return value_and_count<T>(reduce(x.value, y), x.count + 1); | |
| } | |
| __host__ __device__ | |
| value_and_count<T> operator()( | |
| value_and_count<T> const& x | |
| , value_and_count<T> const& y | |
| ) const | |
| { | |
| return value_and_count<T>(reduce(x.value, y.value), x.count + y.count); | |
| } | |
| }; | |
| template <typename InputIt, typename T> | |
| T arithmetic_mean(InputIt first, InputIt last, T init) | |
| { | |
| value_and_count<T> init_vc(init, 0); | |
| counting_op<T, thrust::plus<T> > reduce_vc; | |
| value_and_count<T> vc | |
| = thrust::reduce(first, last, init_vc, reduce_vc); | |
| return vc.value / vc.count; | |
| } | |
| template <typename InputIt> | |
| typename thrust::iterator_traits<InputIt>::value_type | |
| arithmetic_mean(InputIt first, InputIt last) | |
| { | |
| typedef typename thrust::iterator_traits<InputIt>::value_type T; | |
| return arithmetic_mean(first, last, T()); | |
| } | |
| template <typename InputIt, typename T> | |
| T sample_standard_deviation(InputIt first, InputIt last, T average) | |
| { | |
| value_and_count<T> init_vc(T(), 0); | |
| counting_op<T, thrust::plus<T> > reduce_vc; | |
| squared_difference<T> transform(average); | |
| value_and_count<T> vc | |
| = thrust::transform_reduce(first, last, transform, init_vc, reduce_vc); | |
| return std::sqrt(vc.value / T(vc.count - 1)); | |
| } | |
| /////////////////////////////////////////////////////////////////////////////// | |
| // Formulas for propagation of uncertainty from: | |
| // | |
| // https://en.wikipedia.org/wiki/Propagation_of_uncertainty#Example_formulas | |
| // | |
| // Even though it's Wikipedia, I trust it as I helped write that table. | |
| // | |
| // XXX Replace with a proper reference. | |
| // Compute the propagated uncertainty from the multiplication of two uncertain | |
| // values, `A +/- A_unc` and `B +/- B_unc`. Given `f = AB` or `f = A/B`, where | |
| // `A != 0` and `B != 0`, the uncertainty in `f` is approximately: | |
| // | |
| // f_unc = abs(f) * sqrt((A_unc / A) ^ 2 + (B_unc / B) ^ 2) | |
| // | |
| template <typename T> | |
| __host__ __device__ | |
| T uncertainty_multiplicative( | |
| T const& f | |
| , T const& A, T const& A_unc | |
| , T const& B, T const& B_unc | |
| ) | |
| { | |
| return std::abs(f) | |
| * std::sqrt((A_unc / A) * (A_unc / A) + (B_unc / B) * (B_unc / B)); | |
| } | |
| // Compute the propagated uncertainty from addition of two uncertain values, | |
| // `A +/- A_unc` and `B +/- B_unc`. Given `f = cA + dB` (where `c` and `d` are | |
| // certain constants), the uncertainty in `f` is approximately: | |
| // | |
| // f_unc = sqrt(c ^ 2 * A_unc ^ 2 + d ^ 2 * B_unc ^ 2) | |
| // | |
| template <typename T> | |
| __host__ __device__ | |
| T uncertainty_additive( | |
| T const& c, T const& A_unc | |
| , T const& d, T const& B_unc | |
| ) | |
| { | |
| return std::sqrt((c * c * A_unc * A_unc) + (d * d * B_unc * B_unc)); | |
| } | |
| /////////////////////////////////////////////////////////////////////////////// | |
| // Return the significant digit of `x`. The result is the number of digits | |
| // after the decimal place to round to (negative numbers indicate rounding | |
| // before the decimal place) | |
| template <typename T> | |
| int find_significant_digit(T x) | |
| { | |
| if (x == T(0)) return T(0); | |
| return -int(std::floor(std::log10(std::abs(x)))); | |
| } | |
| // Round `x` to `ndigits` after the decimal place (Python-style). | |
| template <typename T, typename N> | |
| T round_to_precision(T x, N ndigits) | |
| { | |
| double m = (x < 0.0) ? -1.0 : 1.0; | |
| double pwr = std::pow(T(10.0), ndigits); | |
| return (std::floor(x * m * pwr + 0.5) / pwr) * m; | |
| } | |
| /////////////////////////////////////////////////////////////////////////////// | |
| void print_experiment_header() | |
| { // {{{ | |
| std::cout << "Thrust Version" | |
| << "," << "Algorithm" | |
| << "," << "Element Type" | |
| << "," << "Element Size" | |
| << "," << "Elements per Trial" | |
| << "," << "Total Input Size" | |
| << "," << "STL Trials" | |
| << "," << "STL Average Walltime" | |
| << "," << "STL Walltime Uncertainty" | |
| << "," << "STL Average Throughput" | |
| << "," << "STL Throughput Uncertainty" | |
| << "," << "Thrust Trials" | |
| << "," << "Thrust Average Walltime" | |
| << "," << "Thrust Walltime Uncertainty" | |
| << "," << "Thrust Average Throughput" | |
| << "," << "Thrust Throughput Uncertainty" | |
| << "," << "TBB Trials" | |
| << "," << "TBB Average Walltime" | |
| << "," << "TBB Walltime Uncertainty" | |
| << "," << "TBB Average Throughput" | |
| << "," << "TBB Throughput Uncertainty" | |
| << std::endl; | |
| std::cout << "" // Thrust Version. | |
| << "," << "" // Algorithm. | |
| << "," << "" // Element Type. | |
| << "," << "bits/element" // Element Size. | |
| << "," << "elements" // Elements per Trial. | |
| << "," << "MiBs" // Total Input Size. | |
| << "," << "trials" // STL Trials. | |
| << "," << "secs" // STL Average Walltime. | |
| << "," << "secs" // STL Walltime Uncertainty. | |
| << "," << "elements/sec" // STL Average Throughput. | |
| << "," << "elements/sec" // STL Throughput Uncertainty. | |
| << "," << "trials" // Thrust Trials. | |
| << "," << "secs" // Thrust Average Walltime. | |
| << "," << "secs" // Thrust Walltime Uncertainty. | |
| << "," << "elements/sec" // Thrust Average Throughput. | |
| << "," << "elements/sec" // Thrust Throughput Uncertainty. | |
| << "," << "trials" // TBB Trials. | |
| << "," << "secs" // TBB Average Walltime. | |
| << "," << "secs" // TBB Walltime Uncertainty. | |
| << "," << "elements/sec" // TBB Average Throughput. | |
| << "," << "elements/sec" // TBB Throughput Uncertainty. | |
| << std::endl; | |
| } // }}} | |
| /////////////////////////////////////////////////////////////////////////////// | |
| struct experiment_results | |
| { | |
| double const average_time; // Arithmetic mean of trial times in seconds. | |
| double const stdev_time; // Sample standard deviation of trial times. | |
| experiment_results(double average_time_, double stdev_time_) | |
| : average_time(average_time_), stdev_time(stdev_time_) {} | |
| }; | |
| /////////////////////////////////////////////////////////////////////////////// | |
| template < | |
| template <typename> class Test | |
| , typename ElementMetaType // Has an embedded typedef `type, | |
| // and a static method `name` that | |
| // returns a char const*. | |
| , uint64_t Elements | |
| , uint64_t BaselineTrials | |
| , uint64_t RegularTrials | |
| > | |
| struct experiment_driver | |
| { | |
| typedef typename ElementMetaType::type element_type; | |
| static char const* const test_name; | |
| static char const* const element_type_name; // Element type name as a string. | |
| static uint64_t const elements; // # of elements per trial. | |
| static uint64_t const element_size; // Size of each element in bits. | |
| static double const input_size; // `elements` * `element_size` in MiB. | |
| static uint64_t const baseline_trials; // # of baseline trials per experiment. | |
| static uint64_t const regular_trials; // # of regular trials per experiment. | |
| static void run_experiment() | |
| { // {{{ | |
| experiment_results stl = std_experiment(); | |
| experiment_results thrust = thrust_experiment(); | |
| experiment_results tbb = tbb_experiment(); | |
| double stl_average_walltime = stl.average_time; | |
| double thrust_average_walltime = thrust.average_time; | |
| double tbb_average_walltime = tbb.average_time; | |
| double stl_average_throughput = elements / stl.average_time; | |
| double thrust_average_throughput = elements / thrust.average_time; | |
| double tbb_average_throughput = elements / tbb.average_time; | |
| double stl_walltime_uncertainty = stl.stdev_time; | |
| double thrust_walltime_uncertainty = thrust.stdev_time; | |
| double tbb_walltime_uncertainty = tbb.stdev_time; | |
| double stl_throughput_uncertainty = uncertainty_multiplicative( | |
| stl_average_throughput | |
| , double(elements), 0.0 | |
| , stl_average_walltime, stl_walltime_uncertainty | |
| ); | |
| double thrust_throughput_uncertainty = uncertainty_multiplicative( | |
| thrust_average_throughput | |
| , double(elements), 0.0 | |
| , thrust_average_walltime, thrust_walltime_uncertainty | |
| ); | |
| double tbb_throughput_uncertainty = uncertainty_multiplicative( | |
| tbb_average_throughput | |
| , double(elements), 0.0 | |
| , tbb_average_walltime, tbb_walltime_uncertainty | |
| ); | |
| // Round the average walltime and walltime uncertainty to the | |
| // significant figure of the walltime uncertainty. | |
| int stl_walltime_precision = std::max( | |
| find_significant_digit(stl.average_time) | |
| , find_significant_digit(stl.stdev_time) | |
| ); | |
| int thrust_walltime_precision = std::max( | |
| find_significant_digit(thrust.average_time) | |
| , find_significant_digit(thrust.stdev_time) | |
| ); | |
| int tbb_walltime_precision = std::max( | |
| find_significant_digit(tbb.average_time) | |
| , find_significant_digit(tbb.stdev_time) | |
| ); | |
| stl_average_walltime = round_to_precision( | |
| stl_average_walltime, stl_walltime_precision | |
| ); | |
| thrust_average_walltime = round_to_precision( | |
| thrust_average_walltime, thrust_walltime_precision | |
| ); | |
| tbb_average_walltime = round_to_precision( | |
| tbb_average_walltime, tbb_walltime_precision | |
| ); | |
| stl_walltime_uncertainty = round_to_precision( | |
| stl_walltime_uncertainty, stl_walltime_precision | |
| ); | |
| thrust_walltime_uncertainty = round_to_precision( | |
| thrust_walltime_uncertainty, thrust_walltime_precision | |
| ); | |
| tbb_walltime_uncertainty = round_to_precision( | |
| tbb_walltime_uncertainty, tbb_walltime_precision | |
| ); | |
| // Round the average throughput and throughput uncertainty to the | |
| // significant figure of the throughput uncertainty. | |
| int stl_throughput_precision = std::max( | |
| find_significant_digit(stl_average_throughput) | |
| , find_significant_digit(stl_throughput_uncertainty) | |
| ); | |
| int thrust_throughput_precision = std::max( | |
| find_significant_digit(thrust_average_throughput) | |
| , find_significant_digit(thrust_throughput_uncertainty) | |
| ); | |
| int tbb_throughput_precision = std::max( | |
| find_significant_digit(tbb_average_throughput) | |
| , find_significant_digit(tbb_throughput_uncertainty) | |
| ); | |
| stl_average_throughput = round_to_precision( | |
| stl_average_throughput, stl_throughput_precision | |
| ); | |
| thrust_average_throughput = round_to_precision( | |
| thrust_average_throughput, thrust_throughput_precision | |
| ); | |
| tbb_average_throughput = round_to_precision( | |
| tbb_average_throughput, tbb_throughput_precision | |
| ); | |
| stl_throughput_uncertainty = round_to_precision( | |
| stl_throughput_uncertainty, stl_throughput_precision | |
| ); | |
| thrust_throughput_uncertainty = round_to_precision( | |
| thrust_throughput_uncertainty, thrust_throughput_precision | |
| ); | |
| tbb_throughput_uncertainty = round_to_precision( | |
| tbb_throughput_uncertainty, tbb_throughput_precision | |
| ); | |
| std::cout << THRUST_VERSION // Thrust Version. | |
| << "," << test_name // Algorithm. | |
| << "," << element_type_name // Element Type. | |
| << "," << element_size // Element Size. | |
| << "," << elements // Elements per Trial. | |
| << "," << input_size // Total Input Size. | |
| << "," << baseline_trials // STL Trials. | |
| << "," << stl_average_walltime // STL Average Walltime. | |
| << "," << stl_walltime_uncertainty // STL Walltime Uncertainty. | |
| << "," << stl_average_throughput // STL Average Throughput. | |
| << "," << stl_throughput_uncertainty // STL Throughput Uncertainty. | |
| << "," << regular_trials // Thrust Trials. | |
| << "," << thrust_average_walltime // Thrust Average Walltime. | |
| << "," << thrust_walltime_uncertainty // Thrust Walltime Uncertainty. | |
| << "," << thrust_average_throughput // Thrust Average Throughput. | |
| << "," << thrust_throughput_uncertainty // Thrust Throughput Uncertainty. | |
| << "," << regular_trials // TBB Trials. | |
| << "," << tbb_average_walltime // TBB Average Walltime. | |
| << "," << tbb_walltime_uncertainty // TBB Walltime Uncertainty. | |
| << "," << tbb_average_throughput // TBB Average Throughput. | |
| << "," << tbb_throughput_uncertainty // TBB Throughput Uncertainty. | |
| << std::endl; | |
| } // }}} | |
| private: | |
| static experiment_results std_experiment() | |
| { | |
| return experiment<typename Test<element_type>::std_trial>(); | |
| } | |
| static experiment_results thrust_experiment() | |
| { | |
| return experiment<typename Test<element_type>::thrust_trial>(); | |
| } | |
| static experiment_results tbb_experiment() | |
| { | |
| return experiment<typename Test<element_type>::tbb_trial>(); | |
| } | |
| template <typename Trial> | |
| static experiment_results experiment() | |
| { // {{{ | |
| Trial trial; | |
| // Allocate storage and generate random input for the warmup trial. | |
| trial.setup(elements); | |
| // Warmup trial. | |
| trial(); | |
| uint64_t const trials | |
| = trial.is_baseline() ? baseline_trials : regular_trials; | |
| std::vector<double> times; | |
| times.reserve(trials); | |
| for (uint64_t t = 0; t < trials; ++t) | |
| { | |
| // Generate random input for next trial. | |
| trial.setup(elements); | |
| steady_timer e; | |
| // Benchmark. | |
| e.start(); | |
| trial(); | |
| e.stop(); | |
| times.push_back(e.seconds_elapsed()); | |
| } | |
| double average_time | |
| = arithmetic_mean(times.begin(), times.end()); | |
| double stdev_time | |
| = sample_standard_deviation(times.begin(), times.end(), average_time); | |
| return experiment_results(average_time, stdev_time); | |
| } // }}} | |
| }; | |
| template < | |
| template <typename> class Test | |
| , typename ElementMetaType | |
| , uint64_t Elements | |
| , uint64_t BaselineTrials | |
| , uint64_t RegularTrials | |
| > | |
| char const* const | |
| experiment_driver< | |
| Test, ElementMetaType, Elements, BaselineTrials, RegularTrials | |
| >::test_name | |
| = Test<typename ElementMetaType::type>::test_name(); | |
| template < | |
| template <typename> class Test | |
| , typename ElementMetaType | |
| , uint64_t Elements | |
| , uint64_t BaselineTrials | |
| , uint64_t RegularTrials | |
| > | |
| char const* const | |
| experiment_driver< | |
| Test, ElementMetaType, Elements, BaselineTrials, RegularTrials | |
| >::element_type_name | |
| = ElementMetaType::name(); | |
| template < | |
| template <typename> class Test | |
| , typename ElementMetaType | |
| , uint64_t Elements | |
| , uint64_t BaselineTrials | |
| , uint64_t RegularTrials | |
| > | |
| uint64_t const | |
| experiment_driver< | |
| Test, ElementMetaType, Elements, BaselineTrials, RegularTrials | |
| >::element_size | |
| = CHAR_BIT * sizeof(typename ElementMetaType::type); | |
| template < | |
| template <typename> class Test | |
| , typename ElementMetaType | |
| , uint64_t Elements | |
| , uint64_t BaselineTrials | |
| , uint64_t RegularTrials | |
| > | |
| uint64_t const | |
| experiment_driver< | |
| Test, ElementMetaType, Elements, BaselineTrials, RegularTrials | |
| >::elements | |
| = Elements; | |
| template < | |
| template <typename> class Test | |
| , typename ElementMetaType | |
| , uint64_t Elements | |
| , uint64_t BaselineTrials | |
| , uint64_t RegularTrials | |
| > | |
| double const | |
| experiment_driver< | |
| Test, ElementMetaType, Elements, BaselineTrials, RegularTrials | |
| >::input_size | |
| = double( Elements /* [elements] */ | |
| * sizeof(typename ElementMetaType::type) /* [bytes/element] */ | |
| ) | |
| / double(1024 * 1024 /* [bytes/MiB] */); | |
| template < | |
| template <typename> class Test | |
| , typename ElementMetaType | |
| , uint64_t Elements | |
| , uint64_t BaselineTrials | |
| , uint64_t RegularTrials | |
| > | |
| uint64_t const | |
| experiment_driver< | |
| Test, ElementMetaType, Elements, BaselineTrials, RegularTrials | |
| >::baseline_trials | |
| = BaselineTrials; | |
| template < | |
| template <typename> class Test | |
| , typename ElementMetaType | |
| , uint64_t Elements | |
| , uint64_t BaselineTrials | |
| , uint64_t RegularTrials | |
| > | |
| uint64_t const | |
| experiment_driver< | |
| Test, ElementMetaType, Elements, BaselineTrials, RegularTrials | |
| >::regular_trials | |
| = RegularTrials; | |
| /////////////////////////////////////////////////////////////////////////////// | |
| // Never create variables, pointers or references of any of the `*_trial_base` | |
| // classes. They are purely mixin base classes and do not have vtables and | |
| // virtual destructors. Using them for polymorphism instead of composition will | |
| // probably cause slicing. | |
| struct baseline_trial {}; | |
| struct regular_trial {}; | |
| template <typename TrialKind = regular_trial> | |
| struct trial_base; | |
| template <> | |
| struct trial_base<baseline_trial> | |
| { | |
| static bool is_baseline() { return true; } | |
| }; | |
| template <> | |
| struct trial_base<regular_trial> | |
| { | |
| static bool is_baseline() { return false; } | |
| }; | |
| template <typename Container, typename TrialKind = regular_trial> | |
| struct inplace_trial_base : trial_base<TrialKind> | |
| { | |
| Container input; | |
| void setup(uint64_t elements) | |
| { | |
| input.resize(elements); | |
| randomize(input); | |
| } | |
| }; | |
| template <typename Container, typename TrialKind = regular_trial> | |
| struct copy_trial_base : trial_base<TrialKind> | |
| { | |
| Container input; | |
| Container output; | |
| void setup(uint64_t elements) | |
| { | |
| input.resize(elements); | |
| output.resize(elements); | |
| randomize(input); | |
| } | |
| }; | |
| template <typename Container, typename TrialKind = regular_trial> | |
| struct shuffle_trial_base : trial_base<TrialKind> | |
| { | |
| Container input; | |
| void setup(uint64_t elements) | |
| { | |
| input.resize(elements); | |
| randomize(input); | |
| } | |
| }; | |
| /////////////////////////////////////////////////////////////////////////////// | |
| template <typename T> | |
| struct reduce_tester | |
| { | |
| static char const* test_name() { return "reduce"; } | |
| struct std_trial : inplace_trial_base<std::vector<T>, baseline_trial> | |
| { | |
| void operator()() | |
| { | |
| if (std::accumulate(this->input.begin(), this->input.end(), T(0)) == 0) | |
| // Prevent optimizer from removing body. | |
| std::cout << "xyz"; | |
| } | |
| }; | |
| struct thrust_trial : inplace_trial_base<thrust::device_vector<T> > | |
| { | |
| void operator()() | |
| { | |
| thrust::reduce(this->input.begin(), this->input.end()); | |
| } | |
| }; | |
| struct tbb_trial : inplace_trial_base<std::vector<T> > | |
| { | |
| void operator()() | |
| { | |
| tbb_reduce(this->input); | |
| } | |
| }; | |
| }; | |
| template <typename T> | |
| struct sort_tester | |
| { | |
| static char const* test_name() { return "sort"; } | |
| struct std_trial : inplace_trial_base<std::vector<T>, baseline_trial> | |
| { | |
| void operator()() | |
| { | |
| std::sort(this->input.begin(), this->input.end()); | |
| } | |
| }; | |
| struct thrust_trial : inplace_trial_base<thrust::device_vector<T> > | |
| { | |
| void operator()() | |
| { | |
| thrust::sort(this->input.begin(), this->input.end()); | |
| cudaError_t err = cudaDeviceSynchronize(); | |
| if (err != cudaSuccess) | |
| throw thrust::error_code(err, thrust::cuda_category()); | |
| } | |
| }; | |
| struct tbb_trial : inplace_trial_base<std::vector<T> > | |
| { | |
| void operator()() | |
| { | |
| tbb_sort(this->input); | |
| } | |
| } | |
| }; | |
| template <typename T> | |
| struct transform_inplace_tester | |
| { | |
| static char const* test_name() { return "transform_inplace"; } | |
| struct std_trial : inplace_trial_base<std::vector<T>, baseline_trial> | |
| { | |
| void operator()() | |
| { | |
| std::transform( | |
| this->input.begin(), this->input.end(), this->input.begin() | |
| , thrust::negate<T>() | |
| ); | |
| } | |
| }; | |
| struct thrust_trial : inplace_trial_base<thrust::device_vector<T> > | |
| { | |
| void operator()() | |
| { | |
| thrust::transform( | |
| this->input.begin(), this->input.end(), this->input.begin() | |
| , thrust::negate<T>() | |
| ); | |
| cudaError_t err = cudaDeviceSynchronize(); | |
| if (err != cudaSuccess) | |
| throw thrust::error_code(err, thrust::cuda_category()); | |
| } | |
| }; | |
| struct tbb_trial : inplace_trial_base<std::vector<T> > | |
| { | |
| void operator()() | |
| { | |
| tbb_transform(this->input); | |
| } | |
| }; | |
| }; | |
| template <typename T> | |
| struct inclusive_scan_inplace_tester | |
| { | |
| static char const* test_name() { return "inclusive_scan_inplace"; } | |
| struct std_trial : inplace_trial_base<std::vector<T>, baseline_trial> | |
| { | |
| void operator()() | |
| { | |
| std::partial_sum( | |
| this->input.begin(), this->input.end(), this->input.begin() | |
| ); | |
| } | |
| }; | |
| struct thrust_trial : inplace_trial_base<thrust::device_vector<T> > | |
| { | |
| void operator()() | |
| { | |
| thrust::inclusive_scan( | |
| this->input.begin(), this->input.end(), this->input.begin() | |
| ); | |
| cudaError_t err = cudaDeviceSynchronize(); | |
| if (err != cudaSuccess) | |
| throw thrust::error_code(err, thrust::cuda_category()); | |
| } | |
| }; | |
| struct tbb_trial : inplace_trial_base<std::vector<T> > | |
| { | |
| void operator()() | |
| { | |
| tbb_scan(this->input); | |
| } | |
| }; | |
| }; | |
| template <typename T> | |
| struct copy_tester | |
| { | |
| static char const* test_name() { return "copy"; } | |
| struct std_trial : copy_trial_base<std::vector<T> > | |
| { | |
| void operator()() | |
| { | |
| std::copy(this->input.begin(), this->input.end(), this->output.begin()); | |
| } | |
| }; | |
| struct thrust_trial : copy_trial_base<thrust::device_vector<T> > | |
| { | |
| void operator()() | |
| { | |
| thrust::copy(this->input.begin(), this->input.end(), this->input.begin()); | |
| cudaError_t err = cudaDeviceSynchronize(); | |
| if (err != cudaSuccess) | |
| throw thrust::error_code(err, thrust::cuda_category()); | |
| } | |
| }; | |
| struct tbb_trial : copy_trial_base<std::vector<T> > | |
| { | |
| void operator()() | |
| { | |
| tbb_copy(this->input, this->output); | |
| } | |
| }; | |
| }; | |
| template <typename T> | |
| struct shuffle_tester | |
| { | |
| static char const* test_name() { return "shuffle"; } | |
| struct std_trial : shuffle_trial_base<std::vector<T>, baseline_trial> | |
| { | |
| std::default_random_engine g; | |
| void operator()() | |
| { | |
| std::shuffle(this->input.begin(), this->input.end(), this->g); | |
| } | |
| }; | |
| struct thrust_trial : shuffle_trial_base<thrust::device_vector<T> > | |
| { | |
| thrust::default_random_engine g; | |
| void operator()() | |
| { | |
| thrust::shuffle(this->input.begin(), this->input.end(), this->g); | |
| cudaError_t err = cudaDeviceSynchronize(); | |
| if (err != cudaSuccess) | |
| throw thrust::error_code(err, thrust::cuda_category()); | |
| } | |
| }; | |
| }; | |
| /////////////////////////////////////////////////////////////////////////////// | |
| template < | |
| typename ElementMetaType | |
| , uint64_t Elements | |
| , uint64_t BaselineTrials | |
| , uint64_t RegularTrials | |
| > | |
| void run_core_primitives_experiments_for_type() | |
| { | |
| experiment_driver< | |
| reduce_tester | |
| , ElementMetaType | |
| , Elements / sizeof(typename ElementMetaType::type) | |
| , BaselineTrials | |
| , RegularTrials | |
| >::run_experiment(); | |
| experiment_driver< | |
| transform_inplace_tester | |
| , ElementMetaType | |
| , Elements / sizeof(typename ElementMetaType::type) | |
| , BaselineTrials | |
| , RegularTrials | |
| >::run_experiment(); | |
| experiment_driver< | |
| inclusive_scan_inplace_tester | |
| , ElementMetaType | |
| , Elements / sizeof(typename ElementMetaType::type) | |
| , BaselineTrials | |
| , RegularTrials | |
| >::run_experiment(); | |
| experiment_driver< | |
| sort_tester | |
| , ElementMetaType | |
| // , Elements / sizeof(typename ElementMetaType::type) | |
| , (Elements >> 6) // Sorting is more sensitive to element count than | |
| // memory footprint. | |
| , BaselineTrials | |
| , RegularTrials | |
| >::run_experiment(); | |
| experiment_driver< | |
| copy_tester | |
| , ElementMetaType | |
| , Elements / sizeof(typename ElementMetaType::type) | |
| , BaselineTrials | |
| , RegularTrials | |
| >::run_experiment(); | |
| experiment_driver< | |
| shuffle_tester | |
| , ElementMetaType | |
| , Elements / sizeof(typename ElementMetaType::type) | |
| , BaselineTrials | |
| , RegularTrials | |
| >::run_experiment(); | |
| } | |
| /////////////////////////////////////////////////////////////////////////////// | |
| DEFINE_ELEMENT_META_TYPE(char); | |
| DEFINE_ELEMENT_META_TYPE(int); | |
| DEFINE_ELEMENT_META_TYPE(int8_t); | |
| DEFINE_ELEMENT_META_TYPE(int16_t); | |
| DEFINE_ELEMENT_META_TYPE(int32_t); | |
| DEFINE_ELEMENT_META_TYPE(int64_t); | |
| DEFINE_ELEMENT_META_TYPE(float); | |
| DEFINE_ELEMENT_META_TYPE(double); | |
| /////////////////////////////////////////////////////////////////////////////// | |
| template < | |
| uint64_t Elements | |
| , uint64_t BaselineTrials | |
| , uint64_t RegularTrials | |
| > | |
| void run_core_primitives_experiments() | |
| { | |
| run_core_primitives_experiments_for_type< | |
| char_meta, Elements, BaselineTrials, RegularTrials | |
| >(); | |
| run_core_primitives_experiments_for_type< | |
| int_meta, Elements, BaselineTrials, RegularTrials | |
| >(); | |
| run_core_primitives_experiments_for_type< | |
| int8_t_meta, Elements, BaselineTrials, RegularTrials | |
| >(); | |
| run_core_primitives_experiments_for_type< | |
| int16_t_meta, Elements, BaselineTrials, RegularTrials | |
| >(); | |
| run_core_primitives_experiments_for_type< | |
| int32_t_meta, Elements, BaselineTrials, RegularTrials | |
| >(); | |
| run_core_primitives_experiments_for_type< | |
| int64_t_meta, Elements, BaselineTrials, RegularTrials | |
| >(); | |
| run_core_primitives_experiments_for_type< | |
| float_meta, Elements, BaselineTrials, RegularTrials | |
| >(); | |
| run_core_primitives_experiments_for_type< | |
| double_meta, Elements, BaselineTrials, RegularTrials | |
| >(); | |
| } | |
| /////////////////////////////////////////////////////////////////////////////// | |
| // XXX Use `std::string_view` when possible. | |
| std::vector<std::string> split(std::string const& str, std::string const& delim) | |
| { | |
| std::vector<std::string> tokens; | |
| std::string::size_type prev = 0, pos = 0; | |
| do | |
| { | |
| pos = str.find(delim, prev); | |
| if (pos == std::string::npos) pos = str.length(); | |
| std::string token = str.substr(prev, pos - prev); | |
| if (!token.empty()) tokens.push_back(token); | |
| prev = pos + delim.length(); | |
| } | |
| while (pos < str.length() && prev < str.length()); | |
| return tokens; | |
| } | |
| /////////////////////////////////////////////////////////////////////////////// | |
| struct command_line_option_error : std::exception | |
| { | |
| virtual ~command_line_option_error() NOEXCEPT {} | |
| virtual const char* what() const NOEXCEPT = 0; | |
| }; | |
| struct only_one_option_allowed : command_line_option_error | |
| { | |
| // Construct a new `only_one_option_allowed` exception. `key` is the | |
| // option name and `[first, last)` is a sequence of | |
| // `std::pair<std::string const, std::string>`s (the values). | |
| template <typename InputIt> | |
| only_one_option_allowed(std::string const& key, InputIt first, InputIt last) | |
| : message() | |
| { | |
| message = "Only one `--"; | |
| message += key; | |
| message += "` option is allowed, but multiple were received: "; | |
| for (; first != last; ++first) | |
| { | |
| message += "`"; | |
| message += (*first).second; | |
| message += "` "; | |
| } | |
| // Remove the trailing space added by the last iteration of the above loop. | |
| message.erase(message.size() - 1, 1); | |
| message += "."; | |
| } | |
| virtual ~only_one_option_allowed() NOEXCEPT {} | |
| virtual const char* what() const NOEXCEPT | |
| { | |
| return message.c_str(); | |
| } | |
| private: | |
| std::string message; | |
| }; | |
| struct required_option_missing : command_line_option_error | |
| { | |
| // Construct a new `requirement_option_missing` exception. `key` is the | |
| // option name. | |
| required_option_missing(std::string const& key) | |
| : message() | |
| { | |
| message = "`--"; | |
| message += key; | |
| message += "` option is required."; | |
| } | |
| virtual ~required_option_missing() NOEXCEPT {} | |
| virtual const char* what() const NOEXCEPT | |
| { | |
| return message.c_str(); | |
| } | |
| private: | |
| std::string message; | |
| }; | |
| struct command_line_processor | |
| { | |
| typedef std::vector<std::string> positional_options_type; | |
| typedef std::multimap<std::string, std::string> keyword_options_type; | |
| typedef std::pair< | |
| keyword_options_type::const_iterator | |
| , keyword_options_type::const_iterator | |
| > keyword_option_values; | |
| command_line_processor(int argc, char** argv) | |
| : pos_args(), kw_args() | |
| { // {{{ | |
| for (int i = 1; i < argc; ++i) | |
| { | |
| std::string arg(argv[i]); | |
| // Look for --key or --key=value options. | |
| if (arg.substr(0, 2) == "--") | |
| { | |
| std::string::size_type n = arg.find('=', 2); | |
| keyword_options_type::value_type key_value; | |
| if (n == std::string::npos) // --key | |
| kw_args.insert(keyword_options_type::value_type( | |
| arg.substr(2), "" | |
| )); | |
| else // --key=value | |
| kw_args.insert(keyword_options_type::value_type( | |
| arg.substr(2, n - 2), arg.substr(n + 1) | |
| )); | |
| kw_args.insert(key_value); | |
| } | |
| else // Assume it's positional. | |
| pos_args.push_back(arg); | |
| } | |
| } // }}} | |
| // Return the value for option `key`. | |
| // | |
| // Throws: | |
| // * `only_one_option_allowed` if there is more than one value for `key`. | |
| // * `required_option_missing` if there is no value for `key`. | |
| std::string operator()(std::string const& key) const | |
| { | |
| keyword_option_values v = kw_args.equal_range(key); | |
| keyword_options_type::difference_type d = std::distance(v.first, v.second); | |
| if (1 < d) // Too many options. | |
| throw only_one_option_allowed(key, v.first, v.second); | |
| else if (0 == d) // No option. | |
| throw required_option_missing(key); | |
| return (*v.first).second; | |
| } | |
| // Return the value for option `key`, or `dflt` if `key` has no value. | |
| // | |
| // Throws: `only_one_option_allowed` if there is more than one value for `key`. | |
| std::string operator()(std::string const& key, std::string const& dflt) const | |
| { | |
| keyword_option_values v = kw_args.equal_range(key); | |
| keyword_options_type::difference_type d = std::distance(v.first, v.second); | |
| if (1 < d) // Too many options. | |
| throw only_one_option_allowed(key, v.first, v.second); | |
| if (0 == d) // No option. | |
| return dflt; | |
| else // 1 option. | |
| return (*v.first).second; | |
| } | |
| // Returns `true` if the option `key` was specified at least once. | |
| bool has(std::string const& key) const | |
| { | |
| return kw_args.count(key) > 0; | |
| } | |
| private: | |
| positional_options_type pos_args; | |
| keyword_options_type kw_args; | |
| }; | |
| /////////////////////////////////////////////////////////////////////////////// | |
| int main(int argc, char** argv) | |
| { | |
| command_line_processor clp(argc, argv); | |
| tbb::task_scheduler_init init; | |
| test_tbb(); | |
| // Set the CUDA device to use for the benchmark - `0` by default. | |
| int device = std::atoi(clp("device", "0").c_str()); | |
| // `std::atoi` returns 0 if the conversion fails. | |
| cudaSetDevice(device); | |
| if (!clp.has("no-header")) | |
| print_experiment_header(); | |
| /* Elements | Trials */ | |
| /* | Baseline | Regular */ | |
| //run_core_primitives_experiments< 1LLU << 21LLU , 4 , 16 >(); | |
| //run_core_primitives_experiments< 1LLU << 22LLU , 4 , 16 >(); | |
| //run_core_primitives_experiments< 1LLU << 23LLU , 4 , 16 >(); | |
| //run_core_primitives_experiments< 1LLU << 24LLU , 4 , 16 >(); | |
| //run_core_primitives_experiments< 1LLU << 25LLU , 4 , 16 >(); | |
| run_core_primitives_experiments< 1LLU << 26LLU , 4 , 16 >(); | |
| run_core_primitives_experiments< 1LLU << 27LLU , 4 , 16 >(); | |
| //run_core_primitives_experiments< 1LLU << 28LLU , 4 , 16 >(); | |
| //run_core_primitives_experiments< 1LLU << 29LLU , 4 , 16 >(); | |
| return 0; | |
| } | |
| // TODO: Add different input sizes and half precision | |