File size: 2,341 Bytes
be11144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#include <unittest/unittest.h>
#include <thrust/tuple.h>
#include <thrust/scan.h>
#include <thrust/transform.h>

#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
#include <unittest/cuda/testframework.h>
#endif

using namespace unittest;

struct SumTupleFunctor
{
  template <typename Tuple>
  __host__ __device__
  Tuple operator()(const Tuple &lhs, const Tuple &rhs)
  {
    using thrust::get;
  
    return thrust::make_tuple(get<0>(lhs) + get<0>(rhs),
                              get<1>(lhs) + get<1>(rhs));
  }
};

struct MakeTupleFunctor
{
  template<typename T1, typename T2>
  __host__ __device__
  thrust::tuple<T1,T2> operator()(T1 &lhs, T2 &rhs)
  {
    return thrust::make_tuple(lhs, rhs);
  }
};


template <typename T>
struct TestTupleScan
{
  void operator()(const size_t n)
  {
     using namespace thrust;

     host_vector<T> h_t1 = unittest::random_integers<T>(n);
     host_vector<T> h_t2 = unittest::random_integers<T>(n);

     // initialize input
     host_vector< tuple<T,T> > h_input(n);
     transform(h_t1.begin(), h_t1.end(), h_t2.begin(), h_input.begin(), MakeTupleFunctor());
     device_vector< tuple<T,T> > d_input = h_input;
     
     // allocate output
     tuple<T,T> zero(0,0);
     host_vector  < tuple<T,T> > h_output(n, zero);
     device_vector< tuple<T,T> > d_output(n, zero);

     // inclusive_scan
     inclusive_scan(h_input.begin(), h_input.end(), h_output.begin(), SumTupleFunctor());
     inclusive_scan(d_input.begin(), d_input.end(), d_output.begin(), SumTupleFunctor());
     ASSERT_EQUAL_QUIET(h_output, d_output);

    // The tests below get miscompiled on Tesla hw for 8b types

#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
    if(const CUDATestDriver *driver = dynamic_cast<const CUDATestDriver*>(&UnitTestDriver::s_driver()))
    {
      if(sizeof(T) == sizeof(unittest::uint8_t) && driver->current_device_architecture() < 200)
      {
        KNOWN_FAILURE;
      } // end if
    } // end if
#endif

     // exclusive_scan
     tuple<T,T> init(13,17);
     exclusive_scan(h_input.begin(), h_input.end(), h_output.begin(), init, SumTupleFunctor());
     exclusive_scan(d_input.begin(), d_input.end(), d_output.begin(), init, SumTupleFunctor());

     ASSERT_EQUAL_QUIET(h_output, d_output);
  }
};
VariableUnitTest<TestTupleScan, IntegralTypes> TestTupleScanInstance;