/build/package/package/library/include/rocrand/rocrand_xorwow.h Source File

/build/package/package/library/include/rocrand/rocrand_xorwow.h Source File#

API library: /build/package/package/library/include/rocrand/rocrand_xorwow.h Source File
rocrand_xorwow.h
1// Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21#ifndef ROCRAND_XORWOW_H_
22#define ROCRAND_XORWOW_H_
23
24#include "rocrand/rocrand_common.h"
25#include "rocrand/rocrand_xorwow_precomputed.h"
26
27#include <hip/hip_runtime.h>
28
37 #define ROCRAND_XORWOW_DEFAULT_SEED 0ULL // end of group rocranddevice
39
40namespace rocrand_device {
41namespace detail {
42
43__forceinline__ __device__ __host__ void copy_vec(unsigned int* dst, const unsigned int* src)
44{
45 for (int i = 0; i < XORWOW_N; i++)
46 {
47 dst[i] = src[i];
48 }
49}
50
51__forceinline__ __device__ __host__ void mul_mat_vec_inplace(const unsigned int* m, unsigned int* v)
52{
53 unsigned int r[XORWOW_N] = { 0 };
54 for (int ij = 0; ij < XORWOW_N * XORWOW_M; ij++)
55 {
56 const int i = ij / XORWOW_M;
57 const int j = ij % XORWOW_M;
58 const unsigned int b = (v[i] & (1U << j)) ? 0xffffffff : 0x0;
59 for (int k = 0; k < XORWOW_N; k++)
60 {
61 r[k] ^= b & m[i * XORWOW_M * XORWOW_N + j * XORWOW_N + k];
62 }
63 }
64 copy_vec(v, r);
65}
66
67} // end detail namespace
68
69class xorwow_engine
70{
71public:
72 struct xorwow_state
73 {
74 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
75 // The Box–Muller transform requires two inputs to convert uniformly
76 // distributed real values [0; 1] to normally distributed real values
77 // (with mean = 0, and stddev = 1). Often user wants only one
78 // normally distributed number, to save performance and random
79 // numbers the 2nd value is saved for future requests.
80 double boxmuller_double; // normally distributed double
81 float boxmuller_float; // normally distributed float
82 #endif
83
84 // Weyl sequence value
85 unsigned int d;
86
87 // Xorshift values (160 bits)
88 unsigned int x[5];
89 };
90
91 __forceinline__ __device__ __host__ xorwow_engine()
92 : xorwow_engine(ROCRAND_XORWOW_DEFAULT_SEED, 0, 0)
93 {}
94
100 __forceinline__ __device__ __host__ xorwow_engine(const unsigned long long seed,
101 const unsigned long long subsequence,
102 const unsigned long long offset)
103 {
104 m_state.x[0] = 123456789U;
105 m_state.x[1] = 362436069U;
106 m_state.x[2] = 521288629U;
107 m_state.x[3] = 88675123U;
108 m_state.x[4] = 5783321U;
109
110 m_state.d = 6615241U;
111
112 // Constants are arbitrary prime numbers
113 const unsigned int s0 = static_cast<unsigned int>(seed) ^ 0x2c7f967fU;
114 const unsigned int s1 = static_cast<unsigned int>(seed >> 32) ^ 0xa03697cbU;
115 const unsigned int t0 = 1228688033U * s0;
116 const unsigned int t1 = 2073658381U * s1;
117 m_state.x[0] += t0;
118 m_state.x[1] ^= t0;
119 m_state.x[2] += t1;
120 m_state.x[3] ^= t1;
121 m_state.x[4] += t0;
122 m_state.d += t1 + t0;
123
124 discard_subsequence(subsequence);
125 discard(offset);
126
127 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
128 m_state.boxmuller_float = ROCRAND_NAN_FLOAT;
129 m_state.boxmuller_double = ROCRAND_NAN_DOUBLE;
130 #endif
131 }
132
134 __forceinline__ __device__ __host__ void discard(unsigned long long offset)
135 {
136 #ifdef __HIP_DEVICE_COMPILE__
137 jump(offset, d_xorwow_jump_matrices);
138 #else
139 jump(offset, h_xorwow_jump_matrices);
140 #endif
141
142 // Apply n steps to Weyl sequence value as well
143 m_state.d += static_cast<unsigned int>(offset) * 362437;
144 }
145
148 __forceinline__ __device__ __host__ void discard_subsequence(unsigned long long subsequence)
149 {
150 // Discard n * 2^67 samples
151 #ifdef __HIP_DEVICE_COMPILE__
152 jump(subsequence, d_xorwow_sequence_jump_matrices);
153 #else
154 jump(subsequence, h_xorwow_sequence_jump_matrices);
155 #endif
156
157 // d has the same value because 2^67 is divisible by 2^32 (d is 32-bit)
158 }
159
160 __forceinline__ __device__ __host__ unsigned int operator()()
161 {
162 return next();
163 }
164
165 __forceinline__ __device__ __host__ unsigned int next()
166 {
167 const unsigned int t = m_state.x[0] ^ (m_state.x[0] >> 2);
168 m_state.x[0] = m_state.x[1];
169 m_state.x[1] = m_state.x[2];
170 m_state.x[2] = m_state.x[3];
171 m_state.x[3] = m_state.x[4];
172 m_state.x[4] = (m_state.x[4] ^ (m_state.x[4] << 4)) ^ (t ^ (t << 1));
173
174 m_state.d += 362437;
175
176 return m_state.d + m_state.x[4];
177 }
178
179protected:
180 __forceinline__ __device__ __host__ void
181 jump(unsigned long long v,
182 const unsigned int jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SIZE])
183 {
184 // x~(n + v) = (A^v mod m)x~n mod m
185 // The matrix (A^v mod m) can be precomputed for selected values of v.
186 //
187 // For XORWOW_JUMP_LOG2 = 2
188 // xorwow_jump_matrices contains precomputed matrices:
189 // A^1, A^4, A^16...
190 //
191 // For XORWOW_JUMP_LOG2 = 2 and XORWOW_SEQUENCE_JUMP_LOG2 = 67
192 // xorwow_sequence_jump_matrices contains precomputed matrices:
193 // A^(1 * 2^67), A^(4 * 2^67), A^(16 * 2^67)...
194 //
195 // Intermediate powers can be calculated as multiplication of the powers above.
196
197 unsigned int mi = 0;
198 while (v > 0)
199 {
200 const unsigned int is = static_cast<unsigned int>(v) & ((1 << XORWOW_JUMP_LOG2) - 1);
201 for (unsigned int i = 0; i < is; i++)
202 {
203 detail::mul_mat_vec_inplace(jump_matrices[mi], m_state.x);
204 }
205 mi++;
206 v >>= XORWOW_JUMP_LOG2;
207 }
208 }
209
210protected:
211 // State
212 xorwow_state m_state;
213
214 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
215 friend struct detail::engine_boxmuller_helper<xorwow_engine>;
216 #endif
217
218}; // xorwow_engine class
219
220} // end namespace rocrand_device
221
226
228typedef rocrand_device::xorwow_engine rocrand_state_xorwow;
230
242__forceinline__ __device__ __host__
243void rocrand_init(const unsigned long long seed,
244 const unsigned long long subsequence,
245 const unsigned long long offset,
246 rocrand_state_xorwow* state)
247{
248 *state = rocrand_state_xorwow(seed, subsequence, offset);
249}
250
263__forceinline__ __device__ __host__
264unsigned int rocrand(rocrand_state_xorwow* state)
265{
266 return state->next();
267}
268
277__forceinline__ __device__ __host__
278void skipahead(unsigned long long offset, rocrand_state_xorwow* state)
279{
280 return state->discard(offset);
281}
282
292__forceinline__ __device__ __host__
293void skipahead_subsequence(unsigned long long subsequence, rocrand_state_xorwow* state)
294{
295 return state->discard_subsequence(subsequence);
296}
297
307__forceinline__ __device__ __host__
308void skipahead_sequence(unsigned long long sequence, rocrand_state_xorwow* state)
309{
310 return state->discard_subsequence(sequence);
311}
312 // end of group rocranddevice
314
315#endif // ROCRAND_XORWOW_H_
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_xorwow *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition rocrand_xorwow.h:264
__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by sequence sequences.
Definition rocrand_xorwow.h:308
__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by offset elements.
Definition rocrand_xorwow.h:278
__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_xorwow *state)
Initialize XORWOW state.
Definition rocrand_xorwow.h:243
__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by subsequence subsequences.
Definition rocrand_xorwow.h:293
#define ROCRAND_XORWOW_DEFAULT_SEED
Default seed for XORWOW PRNG.
Definition rocrand_xorwow.h:37