1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with this
4 * work for additional information regarding copyright ownership. The ASF
5 * licenses this file to you under the Apache License, Version 2.0 (the
6 * "License"); you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 * License for the specific language governing permissions and limitations under
15 * the License.
16 */
17 package org.apache.hadoop.hbase.io.hfile;
18
19 import java.util.ArrayList;
20 import java.util.Arrays;
21 import java.util.Collections;
22 import java.util.Random;
23
24 /**
25 * A class that generates random numbers that follow some distribution.
26 * <p>
27 * Copied from
28 * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
29 * Remove after tfile is committed and use the tfile version of this class
30 * instead.</p>
31 */
32 public class RandomDistribution {
33 /**
34 * Interface for discrete (integer) random distributions.
35 */
36 public interface DiscreteRNG {
37 /**
38 * Get the next random number
39 *
40 * @return the next random number.
41 */
42 int nextInt();
43 }
44
45 /**
46 * P(i)=1/(max-min)
47 */
48 public static final class Flat implements DiscreteRNG {
49 private final Random random;
50 private final int min;
51 private final int max;
52
53 /**
54 * Generate random integers from min (inclusive) to max (exclusive)
55 * following even distribution.
56 *
57 * @param random
58 * The basic random number generator.
59 * @param min
60 * Minimum integer
61 * @param max
62 * maximum integer (exclusive).
63 *
64 */
65 public Flat(Random random, int min, int max) {
66 if (min >= max) {
67 throw new IllegalArgumentException("Invalid range");
68 }
69 this.random = random;
70 this.min = min;
71 this.max = max;
72 }
73
74 /**
75 * @see DiscreteRNG#nextInt()
76 */
77 @Override
78 public int nextInt() {
79 return random.nextInt(max - min) + min;
80 }
81 }
82
83 /**
84 * Zipf distribution. The ratio of the probabilities of integer i and j is
85 * defined as follows:
86 *
87 * P(i)/P(j)=((j-min+1)/(i-min+1))^sigma.
88 */
89 public static final class Zipf implements DiscreteRNG {
90 private static final double DEFAULT_EPSILON = 0.001;
91 private final Random random;
92 private final ArrayList<Integer> k;
93 private final ArrayList<Double> v;
94
95 /**
96 * Constructor
97 *
98 * @param r
99 * The random number generator.
100 * @param min
101 * minimum integer (inclusvie)
102 * @param max
103 * maximum integer (exclusive)
104 * @param sigma
105 * parameter sigma. (sigma > 1.0)
106 */
107 public Zipf(Random r, int min, int max, double sigma) {
108 this(r, min, max, sigma, DEFAULT_EPSILON);
109 }
110
111 /**
112 * Constructor.
113 *
114 * @param r
115 * The random number generator.
116 * @param min
117 * minimum integer (inclusvie)
118 * @param max
119 * maximum integer (exclusive)
120 * @param sigma
121 * parameter sigma. (sigma > 1.0)
122 * @param epsilon
123 * Allowable error percentage (0 < epsilon < 1.0).
124 */
125 public Zipf(Random r, int min, int max, double sigma, double epsilon) {
126 if ((max <= min) || (sigma <= 1) || (epsilon <= 0)
127 || (epsilon >= 0.5)) {
128 throw new IllegalArgumentException("Invalid arguments");
129 }
130 random = r;
131 k = new ArrayList<Integer>();
132 v = new ArrayList<Double>();
133
134 double sum = 0;
135 int last = -1;
136 for (int i = min; i < max; ++i) {
137 sum += Math.exp(-sigma * Math.log(i - min + 1));
138 if ((last == -1) || i * (1 - epsilon) > last) {
139 k.add(i);
140 v.add(sum);
141 last = i;
142 }
143 }
144
145 if (last != max - 1) {
146 k.add(max - 1);
147 v.add(sum);
148 }
149
150 v.set(v.size() - 1, 1.0);
151
152 for (int i = v.size() - 2; i >= 0; --i) {
153 v.set(i, v.get(i) / sum);
154 }
155 }
156
157 /**
158 * @see DiscreteRNG#nextInt()
159 */
160 @Override
161 public int nextInt() {
162 double d = random.nextDouble();
163 int idx = Collections.binarySearch(v, d);
164
165 if (idx > 0) {
166 ++idx;
167 }
168 else {
169 idx = -(idx + 1);
170 }
171
172 if (idx >= v.size()) {
173 idx = v.size() - 1;
174 }
175
176 if (idx == 0) {
177 return k.get(0);
178 }
179
180 int ceiling = k.get(idx);
181 int lower = k.get(idx - 1);
182
183 return ceiling - random.nextInt(ceiling - lower);
184 }
185 }
186
187 /**
188 * Binomial distribution.
189 *
190 * P(k)=select(n, k)*p^k*(1-p)^(n-k) (k = 0, 1, ..., n)
191 *
192 * P(k)=select(max-min-1, k-min)*p^(k-min)*(1-p)^(k-min)*(1-p)^(max-k-1)
193 */
194 public static final class Binomial implements DiscreteRNG {
195 private final Random random;
196 private final int min;
197 private final int n;
198 private final double[] v;
199
200 private static double select(int n, int k) {
201 double ret = 1.0;
202 for (int i = k + 1; i <= n; ++i) {
203 ret *= (double) i / (i - k);
204 }
205 return ret;
206 }
207
208 private static double power(double p, int k) {
209 return Math.exp(k * Math.log(p));
210 }
211
212 /**
213 * Generate random integers from min (inclusive) to max (exclusive)
214 * following Binomial distribution.
215 *
216 * @param random
217 * The basic random number generator.
218 * @param min
219 * Minimum integer
220 * @param max
221 * maximum integer (exclusive).
222 * @param p
223 * parameter.
224 *
225 */
226 public Binomial(Random random, int min, int max, double p) {
227 if (min >= max) {
228 throw new IllegalArgumentException("Invalid range");
229 }
230 this.random = random;
231 this.min = min;
232 this.n = max - min - 1;
233 if (n > 0) {
234 v = new double[n + 1];
235 double sum = 0.0;
236 for (int i = 0; i <= n; ++i) {
237 sum += select(n, i) * power(p, i) * power(1 - p, n - i);
238 v[i] = sum;
239 }
240 for (int i = 0; i <= n; ++i) {
241 v[i] /= sum;
242 }
243 }
244 else {
245 v = null;
246 }
247 }
248
249 /**
250 * @see DiscreteRNG#nextInt()
251 */
252 @Override
253 public int nextInt() {
254 if (v == null) {
255 return min;
256 }
257 double d = random.nextDouble();
258 int idx = Arrays.binarySearch(v, d);
259 if (idx > 0) {
260 ++idx;
261 } else {
262 idx = -(idx + 1);
263 }
264
265 if (idx >= v.length) {
266 idx = v.length - 1;
267 }
268 return idx + min;
269 }
270 }
271 }