001/* ===========================================================
002 * JFreeChart : a free chart library for the Java(tm) platform
003 * ===========================================================
004 *
005 * (C) Copyright 2000-2008, by Object Refinery Limited and Contributors.
006 *
007 * Project Info:  http://www.jfree.org/jfreechart/index.html
008 *
009 * This library is free software; you can redistribute it and/or modify it
010 * under the terms of the GNU Lesser General Public License as published by
011 * the Free Software Foundation; either version 2.1 of the License, or
012 * (at your option) any later version.
013 *
014 * This library is distributed in the hope that it will be useful, but
015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
017 * License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this library; if not, write to the Free Software
021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
022 * USA.
023 *
024 * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
025 * in the United States and other countries.]
026 *
027 * ---------------
028 * Statistics.java
029 * ---------------
030 * (C) Copyright 2000-2008, by Matthew Wright and Contributors.
031 *
032 * Original Author:  Matthew Wright;
033 * Contributor(s):   David Gilbert (for Object Refinery Limited);
034 *
035 * Changes (from 08-Nov-2001)
036 * --------------------------
037 * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG);
038 *               Moved from JFreeChart to package com.jrefinery.data.* in
039 *               JCommon class library (DG);
040 * 24-Jun-2002 : Removed unnecessary local variable (DG);
041 * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG);
042 * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG);
043 * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG);
044 * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0
045 *               release (DG);
046 *
047 */
048
049package org.jfree.data.statistics;
050
051import java.util.ArrayList;
052import java.util.Collection;
053import java.util.Collections;
054import java.util.Iterator;
055import java.util.List;
056
057/**
058 * A utility class that provides some common statistical functions.
059 */
060public abstract class Statistics {
061
062    /**
063     * Returns the mean of an array of numbers.  This is equivalent to calling
064     * <code>calculateMean(values, true)</code>.
065     *
066     * @param values  the values (<code>null</code> not permitted).
067     *
068     * @return The mean.
069     */
070    public static double calculateMean(Number[] values) {
071        return calculateMean(values, true);
072    }
073
074    /**
075     * Returns the mean of an array of numbers.
076     *
077     * @param values  the values (<code>null</code> not permitted).
078     * @param includeNullAndNaN  a flag that controls whether or not
079     *     <code>null</code> and <code>Double.NaN</code> values are included
080     *     in the calculation (if either is present in the array, the result is
081     *     {@link Double#NaN}).
082     *
083     * @return The mean.
084     *
085     * @since 1.0.3
086     */
087    public static double calculateMean(Number[] values,
088            boolean includeNullAndNaN) {
089
090        if (values == null) {
091            throw new IllegalArgumentException("Null 'values' argument.");
092        }
093        double sum = 0.0;
094        double current;
095        int counter = 0;
096        for (int i = 0; i < values.length; i++) {
097            // treat nulls the same as NaNs
098            if (values[i] != null) {
099                current = values[i].doubleValue();
100            }
101            else {
102                current = Double.NaN;
103            }
104            // calculate the sum and count
105            if (includeNullAndNaN || !Double.isNaN(current)) {
106                sum = sum + current;
107                counter++;
108            }
109        }
110        double result = (sum / counter);
111        return result;
112    }
113
114    /**
115     * Returns the mean of a collection of <code>Number</code> objects.
116     *
117     * @param values  the values (<code>null</code> not permitted).
118     *
119     * @return The mean.
120     */
121    public static double calculateMean(Collection values) {
122        return calculateMean(values, true);
123    }
124
125    /**
126     * Returns the mean of a collection of <code>Number</code> objects.
127     *
128     * @param values  the values (<code>null</code> not permitted).
129     * @param includeNullAndNaN  a flag that controls whether or not
130     *     <code>null</code> and <code>Double.NaN</code> values are included
131     *     in the calculation (if either is present in the array, the result is
132     *     {@link Double#NaN}).
133     *
134     * @return The mean.
135     *
136     * @since 1.0.3
137     */
138    public static double calculateMean(Collection values,
139            boolean includeNullAndNaN) {
140
141        if (values == null) {
142            throw new IllegalArgumentException("Null 'values' argument.");
143        }
144        int count = 0;
145        double total = 0.0;
146        Iterator iterator = values.iterator();
147        while (iterator.hasNext()) {
148            Object object = iterator.next();
149            if (object == null) {
150                if (includeNullAndNaN) {
151                    return Double.NaN;
152                }
153            }
154            else {
155                if (object instanceof Number) {
156                    Number number = (Number) object;
157                    double value = number.doubleValue();
158                    if (Double.isNaN(value)) {
159                        if (includeNullAndNaN) {
160                            return Double.NaN;
161                        }
162                    }
163                    else {
164                        total = total + number.doubleValue();
165                        count = count + 1;
166                    }
167                }
168            }
169        }
170        return total / count;
171    }
172
173    /**
174     * Calculates the median for a list of values (<code>Number</code> objects).
175     * The list of values will be copied, and the copy sorted, before
176     * calculating the median.  To avoid this step (if your list of values
177     * is already sorted), use the {@link #calculateMedian(List, boolean)}
178     * method.
179     *
180     * @param values  the values (<code>null</code> permitted).
181     *
182     * @return The median.
183     */
184    public static double calculateMedian(List values) {
185        return calculateMedian(values, true);
186    }
187
188    /**
189     * Calculates the median for a list of values (<code>Number</code> objects).
190     * If <code>copyAndSort</code> is <code>false</code>, the list is assumed
191     * to be presorted in ascending order by value.
192     *
193     * @param values  the values (<code>null</code> permitted).
194     * @param copyAndSort  a flag that controls whether the list of values is
195     *                     copied and sorted.
196     *
197     * @return The median.
198     */
199    public static double calculateMedian(List values, boolean copyAndSort) {
200
201        double result = Double.NaN;
202        if (values != null) {
203            if (copyAndSort) {
204                int itemCount = values.size();
205                List copy = new ArrayList(itemCount);
206                for (int i = 0; i < itemCount; i++) {
207                    copy.add(i, values.get(i));
208                }
209                Collections.sort(copy);
210                values = copy;
211            }
212            int count = values.size();
213            if (count > 0) {
214                if (count % 2 == 1) {
215                    if (count > 1) {
216                        Number value = (Number) values.get((count - 1) / 2);
217                        result = value.doubleValue();
218                    }
219                    else {
220                        Number value = (Number) values.get(0);
221                        result = value.doubleValue();
222                    }
223                }
224                else {
225                    Number value1 = (Number) values.get(count / 2 - 1);
226                    Number value2 = (Number) values.get(count / 2);
227                    result = (value1.doubleValue() + value2.doubleValue())
228                             / 2.0;
229                }
230            }
231        }
232        return result;
233    }
234
235    /**
236     * Calculates the median for a sublist within a list of values
237     * (<code>Number</code> objects).
238     *
239     * @param values  the values, in any order (<code>null</code> not
240     *                permitted).
241     * @param start  the start index.
242     * @param end  the end index.
243     *
244     * @return The median.
245     */
246    public static double calculateMedian(List values, int start, int end) {
247        return calculateMedian(values, start, end, true);
248    }
249
250    /**
251     * Calculates the median for a sublist within a list of values
252     * (<code>Number</code> objects).  The entire list will be sorted if the
253     * <code>ascending</code< argument is <code>false</code>.
254     *
255     * @param values  the values (<code>null</code> not permitted).
256     * @param start  the start index.
257     * @param end  the end index.
258     * @param copyAndSort  a flag that that controls whether the list of values
259     *                     is copied and sorted.
260     *
261     * @return The median.
262     */
263    public static double calculateMedian(List values, int start, int end,
264                                         boolean copyAndSort) {
265
266        double result = Double.NaN;
267        if (copyAndSort) {
268            List working = new ArrayList(end - start + 1);
269            for (int i = start; i <= end; i++) {
270                working.add(values.get(i));
271            }
272            Collections.sort(working);
273            result = calculateMedian(working, false);
274        }
275        else {
276            int count = end - start + 1;
277            if (count > 0) {
278                if (count % 2 == 1) {
279                    if (count > 1) {
280                        Number value
281                            = (Number) values.get(start + (count - 1) / 2);
282                        result = value.doubleValue();
283                    }
284                    else {
285                        Number value = (Number) values.get(start);
286                        result = value.doubleValue();
287                    }
288                }
289                else {
290                    Number value1 = (Number) values.get(start + count / 2 - 1);
291                    Number value2 = (Number) values.get(start + count / 2);
292                    result
293                        = (value1.doubleValue() + value2.doubleValue()) / 2.0;
294                }
295            }
296        }
297        return result;
298
299    }
300
301    /**
302     * Returns the standard deviation of a set of numbers.
303     *
304     * @param data  the data (<code>null</code> or zero length array not
305     *     permitted).
306     *
307     * @return The standard deviation of a set of numbers.
308     */
309    public static double getStdDev(Number[] data) {
310        if (data == null) {
311            throw new IllegalArgumentException("Null 'data' array.");
312        }
313        if (data.length == 0) {
314            throw new IllegalArgumentException("Zero length 'data' array.");
315        }
316        double avg = calculateMean(data);
317        double sum = 0.0;
318
319        for (int counter = 0; counter < data.length; counter++) {
320            double diff = data[counter].doubleValue() - avg;
321            sum = sum + diff * diff;
322        }
323        return Math.sqrt(sum / (data.length - 1));
324    }
325
326    /**
327     * Fits a straight line to a set of (x, y) data, returning the slope and
328     * intercept.
329     *
330     * @param xData  the x-data (<code>null</code> not permitted).
331     * @param yData  the y-data (<code>null</code> not permitted).
332     *
333     * @return A double array with the intercept in [0] and the slope in [1].
334     */
335    public static double[] getLinearFit(Number[] xData, Number[] yData) {
336
337        if (xData == null) {
338            throw new IllegalArgumentException("Null 'xData' argument.");
339        }
340        if (yData == null) {
341            throw new IllegalArgumentException("Null 'yData' argument.");
342        }
343        if (xData.length != yData.length) {
344            throw new IllegalArgumentException(
345                "Statistics.getLinearFit(): array lengths must be equal.");
346        }
347
348        double[] result = new double[2];
349        // slope
350        result[1] = getSlope(xData, yData);
351        // intercept
352        result[0] = calculateMean(yData) - result[1] * calculateMean(xData);
353
354        return result;
355
356    }
357
358    /**
359     * Finds the slope of a regression line using least squares.
360     *
361     * @param xData  the x-values (<code>null</code> not permitted).
362     * @param yData  the y-values (<code>null</code> not permitted).
363     *
364     * @return The slope.
365     */
366    public static double getSlope(Number[] xData, Number[] yData) {
367
368        if (xData == null) {
369            throw new IllegalArgumentException("Null 'xData' argument.");
370        }
371        if (yData == null) {
372            throw new IllegalArgumentException("Null 'yData' argument.");
373        }
374        if (xData.length != yData.length) {
375            throw new IllegalArgumentException("Array lengths must be equal.");
376        }
377
378        // ********* stat function for linear slope ********
379        // y = a + bx
380        // a = ybar - b * xbar
381        //     sum(x * y) - (sum (x) * sum(y)) / n
382        // b = ------------------------------------
383        //     sum (x^2) - (sum(x)^2 / n
384        // *************************************************
385
386        // sum of x, x^2, x * y, y
387        double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0;
388        int counter;
389        for (counter = 0; counter < xData.length; counter++) {
390            sx = sx + xData[counter].doubleValue();
391            sxx = sxx + Math.pow(xData[counter].doubleValue(), 2);
392            sxy = sxy + yData[counter].doubleValue()
393                      * xData[counter].doubleValue();
394            sy = sy + yData[counter].doubleValue();
395        }
396        return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter);
397
398    }
399
400    /**
401     * Calculates the correlation between two datasets.  Both arrays should
402     * contain the same number of items.  Null values are treated as zero.
403     * <P>
404     * Information about the correlation calculation was obtained from:
405     *
406     * http://trochim.human.cornell.edu/kb/statcorr.htm
407     *
408     * @param data1  the first dataset.
409     * @param data2  the second dataset.
410     *
411     * @return The correlation.
412     */
413    public static double getCorrelation(Number[] data1, Number[] data2) {
414        if (data1 == null) {
415            throw new IllegalArgumentException("Null 'data1' argument.");
416        }
417        if (data2 == null) {
418            throw new IllegalArgumentException("Null 'data2' argument.");
419        }
420        if (data1.length != data2.length) {
421            throw new IllegalArgumentException(
422                "'data1' and 'data2' arrays must have same length."
423            );
424        }
425        int n = data1.length;
426        double sumX = 0.0;
427        double sumY = 0.0;
428        double sumX2 = 0.0;
429        double sumY2 = 0.0;
430        double sumXY = 0.0;
431        for (int i = 0; i < n; i++) {
432            double x = 0.0;
433            if (data1[i] != null) {
434                x = data1[i].doubleValue();
435            }
436            double y = 0.0;
437            if (data2[i] != null) {
438                y = data2[i].doubleValue();
439            }
440            sumX = sumX + x;
441            sumY = sumY + y;
442            sumXY = sumXY + (x * y);
443            sumX2 = sumX2 + (x * x);
444            sumY2 = sumY2 + (y * y);
445        }
446        return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX)
447                * (n * sumY2 - sumY * sumY), 0.5);
448    }
449
450    /**
451     * Returns a data set for a moving average on the data set passed in.
452     *
453     * @param xData  an array of the x data.
454     * @param yData  an array of the y data.
455     * @param period  the number of data points to average
456     *
457     * @return A double[][] the length of the data set in the first dimension,
458     *         with two doubles for x and y in the second dimension
459     */
460    public static double[][] getMovingAverage(Number[] xData,
461                                              Number[] yData,
462                                              int period) {
463
464        // check arguments...
465        if (xData.length != yData.length) {
466            throw new IllegalArgumentException("Array lengths must be equal.");
467        }
468
469        if (period > xData.length) {
470            throw new IllegalArgumentException(
471                "Period can't be longer than dataset."
472            );
473        }
474
475        double[][] result = new double[xData.length - period][2];
476        for (int i = 0; i < result.length; i++) {
477            result[i][0] = xData[i + period].doubleValue();
478            // holds the moving average sum
479            double sum = 0.0;
480            for (int j = 0; j < period; j++) {
481                sum += yData[i + j].doubleValue();
482            }
483            sum = sum / period;
484            result[i][1] = sum;
485        }
486        return result;
487
488    }
489
490}