File size: 3,969 Bytes
be11144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#include <unittest/unittest.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/reduce.h>

#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
#include <unittest/cuda/testframework.h>
#endif

using namespace unittest;

template<typename Tuple>
struct TuplePlus
{
  __host__ __device__
  Tuple operator()(Tuple x, Tuple y) const
  {
    using namespace thrust;
    return make_tuple(get<0>(x) + get<0>(y),
                      get<1>(x) + get<1>(y));
  }
}; // end TuplePlus


template <typename T>
struct TestZipIteratorReduceByKey
{
  void operator()(const size_t n)
  {
    using namespace thrust;

    host_vector<T> h_data0 = unittest::random_integers<bool>(n);
    host_vector<T> h_data1 = unittest::random_integers<T>(n);
    host_vector<T> h_data2 = unittest::random_integers<T>(n);

    device_vector<T> d_data0 = h_data0;
    device_vector<T> d_data1 = h_data1;
    device_vector<T> d_data2 = h_data2;

    typedef tuple<T,T> Tuple;

    // integer key, tuple value
    {
      host_vector<T> h_data3(n,0);
      host_vector<T> h_data4(n,0);
      host_vector<T> h_data5(n,0);
      device_vector<T> d_data3(n,0);
      device_vector<T> d_data4(n,0);
      device_vector<T> d_data5(n,0);

      // run on host
      reduce_by_key
          ( h_data0.begin(), h_data0.end(),
            make_zip_iterator(make_tuple(h_data1.begin(), h_data2.begin())),
            h_data3.begin(),
            make_zip_iterator(make_tuple(h_data4.begin(), h_data5.begin())),
            equal_to<T>(),
            TuplePlus<Tuple>());

      // run on device
      reduce_by_key
          ( d_data0.begin(), d_data0.end(),
            make_zip_iterator(make_tuple(d_data1.begin(), d_data2.begin())),
            d_data3.begin(),
            make_zip_iterator(make_tuple(d_data4.begin(), d_data5.begin())),
            equal_to<T>(),
            TuplePlus<Tuple>());

      ASSERT_EQUAL(h_data3, d_data3);
      ASSERT_EQUAL(h_data4, d_data4);
      ASSERT_EQUAL(h_data5, d_data5);
    }
    
    // The tests below get miscompiled on Tesla hw for 8b types

#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
    if(const CUDATestDriver *driver = dynamic_cast<const CUDATestDriver*>(&UnitTestDriver::s_driver()))
    {
      if(typeid(T) == typeid(unittest::uint8_t) && driver->current_device_architecture() < 200)
      {
        KNOWN_FAILURE;
      } // end if
    } // end if
#endif

    // tuple key, tuple value
    {
      host_vector<T> h_data3(n,0);
      host_vector<T> h_data4(n,0);
      host_vector<T> h_data5(n,0);
      host_vector<T> h_data6(n,0);
      device_vector<T> d_data3(n,0);
      device_vector<T> d_data4(n,0);
      device_vector<T> d_data5(n,0);
      device_vector<T> d_data6(n,0);

      // run on host
      reduce_by_key
          ( make_zip_iterator(make_tuple(h_data0.begin(), h_data0.begin())),
            make_zip_iterator(make_tuple(h_data0.end(),   h_data0.end())),
            make_zip_iterator(make_tuple(h_data1.begin(), h_data2.begin())),
            make_zip_iterator(make_tuple(h_data3.begin(), h_data4.begin())),
            make_zip_iterator(make_tuple(h_data5.begin(), h_data6.begin())),
            equal_to<Tuple>(),
            TuplePlus<Tuple>());

      // run on device
      reduce_by_key
          ( make_zip_iterator(make_tuple(d_data0.begin(), d_data0.begin())),
            make_zip_iterator(make_tuple(d_data0.end(),   d_data0.end())),
            make_zip_iterator(make_tuple(d_data1.begin(), d_data2.begin())),
            make_zip_iterator(make_tuple(d_data3.begin(), d_data4.begin())),
            make_zip_iterator(make_tuple(d_data5.begin(), d_data6.begin())),
            equal_to<Tuple>(),
            TuplePlus<Tuple>());

      ASSERT_EQUAL(h_data3, d_data3);
      ASSERT_EQUAL(h_data4, d_data4);
      ASSERT_EQUAL(h_data5, d_data5);
      ASSERT_EQUAL(h_data6, d_data6);
    }
  }
};
VariableUnitTest<TestZipIteratorReduceByKey, UnsignedIntegralTypes> TestZipIteratorReduceByKeyInstance;