Creating an infinite iterator with a given distribution
Given java.util.Collection
, what is the simplest way to create an infinite java.util.Iterator
that returns those items that are displayed according to a given distribution ( org.apache.commons.math.distribution
)?
source to share
List<Object> l = new ArrayList<Object>(coll);
Iterator<Object> i = new Iterator<Object>() {
public boolean hasNext() { return true; }
public Object next() {
return coll.get(distribution.nextInt(0, l.size());
}
}
Your problem is how to convert the classes Distribution
in the apache library to implement the method nextInt
. I have to say that it is far from obvious to me how you can do this with an interface Distribution
.
One (slightly garbage) way I can imagine is to create a dataset EmpiricalDistribution
(in a batch random
) using the probability determined by your actual distribution and then using that emprirical dsitribution like Distribution
(above)
source to share
Solution for Gaussian distribution
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.SortedMap;
import java.util.Map.Entry;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.collect.ImmutableSortedMap.Builder;
/**
* Endless sequence with gaussian distribution.
*
* @param <T> the type of the elements
* @author Michael Locher
*/
public class GaussianSequence<T> implements Iterable<T>, Iterator<T> {
private static final int HISTOGRAMM_SAMPLES = 50000;
private static final int HISTOGRAMM_ELEMENTS = 100;
private static final int HISTOGRAMM_LENGTH = 80;
private static final double DEFAULT_CUTOFF = 4.0;
private final List<T> elements;
private final int maxIndex;
private final Random rnd;
private final double scaling;
private final double halfCount;
/**
* Creates this.
* @param rnd the source of randomness to use
* @param elements the elements to deliver
*/
public GaussianSequence(final Random rnd, final Collection<T> elements) {
this(rnd, DEFAULT_CUTOFF, elements);
}
private GaussianSequence(final Random rnd, final double tailCutOff, final Collection<T> elements) {
super();
this.rnd = rnd;
this.elements = new ArrayList<T>(elements);
if (this.elements.isEmpty()) {
throw new IllegalArgumentException("no elements provided");
}
this.maxIndex = this.elements.size() - 1;
this.halfCount = this.elements.size() / 2.0;
this.scaling = this.halfCount / tailCutOff;
}
/**
* {@inheritDoc}
*/
@Override
public Iterator<T> iterator() {
return this;
}
/**
* {@inheritDoc}
*/
@Override
public boolean hasNext() {
return true;
}
/**
* {@inheritDoc}
*/
@Override
public void remove() {
throw new UnsupportedOperationException();
}
/**
* {@inheritDoc}
*/
@Override
public T next() {
return this.elements.get(sanitizeIndex(determineNextIndex()));
}
private int determineNextIndex() {
final double z = this.rnd.nextGaussian();
return (int) (this.halfCount + (this.scaling * z));
}
private int sanitizeIndex(final int index) {
if (index < 0) {
return 0;
}
if (index > this.maxIndex) {
return this.maxIndex;
}
return index;
}
/**
* Prints a histogramm to stdout.
* @param args not used
*/
public static void main(final String[] args) {
final PrintWriter out = new PrintWriter(new OutputStreamWriter(System.out, Charset.forName("UTF-8")), true);
plotHistogramm(new Random(), out);
}
private static void plotHistogramm(final Random rnd, final PrintWriter out) {
// build elements
final Multimap<Integer, Integer> results = ArrayListMultimap.create();
final List<Integer> elements = Lists.newArrayListWithCapacity(HISTOGRAMM_ELEMENTS);
for (int i = 1; i < HISTOGRAMM_ELEMENTS; i++) {
elements.add(i);
}
// sample sequence
final Iterator<Integer> randomSeq = new GaussianSequence<Integer>(rnd, elements);
for (int j = 0; j < HISTOGRAMM_SAMPLES; j++) {
final Integer sampled = randomSeq.next();
results.put(sampled, sampled);
}
// count and sort results
final Builder<Integer, Integer> r = ImmutableSortedMap.naturalOrder();
for (final Entry<Integer, Collection<Integer>> e : results.asMap().entrySet()) {
final int count = e.getValue().size();
r.put(e.getKey(), count);
}
// plot results
final SortedMap<Integer, Integer> sortedAndCounted = r.build();
final double histogramScale = (double) HISTOGRAMM_LENGTH / Collections.max(sortedAndCounted.values());
for (final Entry<Integer, Integer> e : sortedAndCounted.entrySet()) {
out.format("%3d [%4d]", e.getKey(), e.getValue());
final StringBuilder c = new StringBuilder();
final int lineLength = (int) (histogramScale * e.getValue());
for (int i = 0; i < lineLength; i++) {
c.append('*');
}
out.println(c);
}
}
}
source to share