001/*- 002 ******************************************************************************* 003 * Copyright (c) 2011, 2016 Diamond Light Source Ltd. 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the Eclipse Public License v1.0 006 * which accompanies this distribution, and is available at 007 * http://www.eclipse.org/legal/epl-v10.html 008 * 009 * Contributors: 010 * Peter Chang - initial API and implementation and/or initial documentation 011 *******************************************************************************/ 012 013package org.eclipse.january.dataset; 014 015import java.lang.ref.SoftReference; 016import java.util.ArrayList; 017import java.util.Collections; 018import java.util.HashMap; 019import java.util.List; 020import java.util.Map; 021import java.util.TreeMap; 022 023import org.apache.commons.math3.complex.Complex; 024import org.apache.commons.math3.stat.descriptive.moment.Kurtosis; 025import org.apache.commons.math3.stat.descriptive.moment.Skewness; 026import org.eclipse.january.metadata.Dirtiable; 027import org.eclipse.january.metadata.MetadataType; 028 029 030/** 031 * Statistics class 032 * 033 * TODO Where is mode? http://en.wikipedia.org/wiki/Mode_(statistics) 034 * 035 */ 036public class Stats { 037 038 private static class ReferencedDataset extends SoftReference<Dataset> { 039 public ReferencedDataset(Dataset d) { 040 super(d); 041 } 042 } 043 044 private static class QStatisticsImpl<T> implements MetadataType { 045 private static final long serialVersionUID = -3296671666463190388L; 046 final static Double Q1 = 0.25; 047 final static Double Q2 = 0.5; 048 final static Double Q3 = 0.75; 049 Map<Double, T> qmap = new HashMap<Double, T>(); 050 transient Map<Integer, Map<Double, ReferencedDataset>> aqmap = new HashMap<Integer, Map<Double, ReferencedDataset>>(); 051 transient ReferencedDataset s; // store 0th element 052 transient Map<Integer, ReferencedDataset> smap = new HashMap<>(); 053 054 @Dirtiable 055 private boolean isDirty = true; 056 057 @Override 058 public QStatisticsImpl<T> clone() { 059 return new QStatisticsImpl<T>(this); 060 } 061 062 public QStatisticsImpl() { 063 } 064 065 private QStatisticsImpl(QStatisticsImpl<T> qstats) { 066 if (qstats.s != null && qstats.s.get() != null) { 067 s = new ReferencedDataset(qstats.s.get().getView(false)); 068 } 069 qmap.putAll(qstats.qmap); 070 for (Integer i : qstats.aqmap.keySet()) { 071 aqmap.put(i, new HashMap<>(qstats.aqmap.get(i))); 072 } 073 smap.putAll(qstats.smap); 074 isDirty = qstats.isDirty; 075 } 076 077 public void setQuantile(double q, T v) { 078 qmap.put(q, v); 079 } 080 081 public T getQuantile(double q) { 082 return qmap.get(q); 083 } 084 085 private Map<Double, ReferencedDataset> getMap(int axis) { 086 Map<Double, ReferencedDataset> qm = aqmap.get(axis); 087 if (qm == null) { 088 qm = new HashMap<>(); 089 aqmap.put(axis, qm); 090 } 091 return qm; 092 } 093 094 public void setQuantile(int axis, double q, Dataset v) { 095 Map<Double, ReferencedDataset> qm = getMap(axis); 096 qm.put(q, new ReferencedDataset(v)); 097 } 098 099 public Dataset getQuantile(int axis, double q) { 100 Map<Double, ReferencedDataset> qm = getMap(axis); 101 ReferencedDataset rd = qm.get(q); 102 return rd == null ? null : rd.get(); 103 } 104 105 Dataset getSortedDataset(int axis) { 106 return smap.containsKey(axis) ? smap.get(axis).get() : null; 107 } 108 109 void setSortedDataset(int axis, Dataset v) { 110 smap.put(axis, new ReferencedDataset(v)); 111 } 112 } 113 114 // calculates statistics and returns sorted dataset (0th element if compound) 115 private static QStatisticsImpl<?> calcQuartileStats(final Dataset a) { 116 Dataset s = null; 117 final int is = a.getElementsPerItem(); 118 119 if (is == 1) { 120 s = DatasetUtils.sort(a); 121 122 QStatisticsImpl<Double> qstats = new QStatisticsImpl<Double>(); 123 124 qstats.setQuantile(QStatisticsImpl.Q1, pQuantile(s, QStatisticsImpl.Q1)); 125 qstats.setQuantile(QStatisticsImpl.Q2, pQuantile(s, QStatisticsImpl.Q2)); 126 qstats.setQuantile(QStatisticsImpl.Q3, pQuantile(s, QStatisticsImpl.Q3)); 127 qstats.s = new ReferencedDataset(s); 128 return qstats; 129 } 130 131 QStatisticsImpl<double[]> qstats = new QStatisticsImpl<double[]>(); 132 133 Dataset w = DatasetFactory.zeros(1, a.getClass(), a.getShapeRef()); 134 double[] q1 = new double[is]; 135 double[] q2 = new double[is]; 136 double[] q3 = new double[is]; 137 qstats.setQuantile(QStatisticsImpl.Q1, q1); 138 qstats.setQuantile(QStatisticsImpl.Q2, q2); 139 qstats.setQuantile(QStatisticsImpl.Q3, q3); 140 for (int j = 0; j < is; j++) { 141 ((CompoundDataset) a).copyElements(w, j); 142 w.sort(null); 143 144 q1[j] = pQuantile(w, QStatisticsImpl.Q1); 145 q2[j] = pQuantile(w, QStatisticsImpl.Q2); 146 q3[j] = pQuantile(w, QStatisticsImpl.Q3); 147 if (j == 0) 148 s = w.clone(); 149 } 150 qstats.s = new ReferencedDataset(s); 151 152 return qstats; 153 } 154 155 static private QStatisticsImpl<?> getQStatistics(final Dataset a) { 156 QStatisticsImpl<?> m = a.getFirstMetadata(QStatisticsImpl.class); 157 if (m == null || m.isDirty) { 158 m = calcQuartileStats(a); 159 a.setMetadata(m); 160 } 161 return m; 162 } 163 164 static private QStatisticsImpl<?> getQStatistics(final Dataset a, final int axis) { 165 final int is = a.getElementsPerItem(); 166 QStatisticsImpl<?> qstats = a.getFirstMetadata(QStatisticsImpl.class); 167 168 if (qstats == null || qstats.isDirty) { 169 if (is == 1) { 170 qstats = new QStatisticsImpl<Double>(); 171 } else { 172 qstats = new QStatisticsImpl<double[]>(); 173 } 174 a.setMetadata(qstats); 175 } 176 177 if (qstats.getQuantile(axis, QStatisticsImpl.Q2) == null) { 178 if (is == 1) { 179 Dataset s = DatasetUtils.sort(a, axis); 180 181 qstats.setQuantile(axis, QStatisticsImpl.Q1, pQuantile(s, axis, QStatisticsImpl.Q1)); 182 qstats.setQuantile(axis, QStatisticsImpl.Q2, pQuantile(s, axis, QStatisticsImpl.Q2)); 183 qstats.setQuantile(axis, QStatisticsImpl.Q3, pQuantile(s, axis, QStatisticsImpl.Q3)); 184 qstats.setSortedDataset(axis, s); 185 } else { 186 Dataset w = DatasetFactory.zeros(1, a.getClass(), a.getShapeRef()); 187 CompoundDoubleDataset q1 = null, q2 = null, q3 = null; 188 for (int j = 0; j < is; j++) { 189 ((CompoundDataset) a).copyElements(w, j); 190 w.sort(axis); 191 192 final Dataset c = pQuantile(w, axis, QStatisticsImpl.Q1); 193 if (j == 0) { 194 q1 = DatasetFactory.zeros(is, CompoundDoubleDataset.class, c.getShapeRef()); 195 q2 = DatasetFactory.zeros(is, CompoundDoubleDataset.class, c.getShapeRef()); 196 q3 = DatasetFactory.zeros(is, CompoundDoubleDataset.class, c.getShapeRef()); 197 } 198 q1.setElements(c, j); 199 200 q2.setElements(pQuantile(w, axis, QStatisticsImpl.Q2), j); 201 202 q3.setElements(pQuantile(w, axis, QStatisticsImpl.Q3), j); 203 } 204 qstats.setQuantile(axis, QStatisticsImpl.Q1, q1); 205 qstats.setQuantile(axis, QStatisticsImpl.Q2, q2); 206 qstats.setQuantile(axis, QStatisticsImpl.Q3, q3); 207 } 208 } 209 210 return qstats; 211 } 212 213 // process a sorted dataset 214 private static double pQuantile(final Dataset s, final double q) { 215 double f = (s.getSize() - 1) * q; // fraction of sample number 216 if (f < 0) 217 return Double.NaN; 218 int qpt = (int) Math.floor(f); // quantile point 219 f -= qpt; 220 221 double quantile = s.getElementDoubleAbs(qpt); 222 if (f > 0) { 223 quantile = (1-f)*quantile + f*s.getElementDoubleAbs(qpt+1); 224 } 225 return quantile; 226 } 227 228 // process a sorted dataset and returns a double or compound double dataset 229 private static Dataset pQuantile(final Dataset s, final int axis, final double q) { 230 final int rank = s.getRank(); 231 final int is = s.getElementsPerItem(); 232 233 int[] oshape = s.getShape(); 234 235 double f = (oshape[axis] - 1) * q; // fraction of sample number 236 int qpt = (int) Math.floor(f); // quantile point 237 f -= qpt; 238 239 oshape[axis] = 1; 240 int[] qshape = ShapeUtils.squeezeShape(oshape, false); 241 Dataset qds = DatasetFactory.zeros(is, CompoundDoubleDataset.class, qshape); 242 243 IndexIterator qiter = qds.getIterator(true); 244 int[] qpos = qiter.getPos(); 245 int[] spos = oshape; 246 247 while (qiter.hasNext()) { 248 int i = 0; 249 for (; i < axis; i++) { 250 spos[i] = qpos[i]; 251 } 252 spos[i++] = qpt; 253 for (; i < rank; i++) { 254 spos[i] = qpos[i-1]; 255 } 256 257 Object obj = s.getObject(spos); 258 qds.set(obj, qpos); 259 } 260 261 if (f > 0) { 262 qiter = qds.getIterator(true); 263 qpos = qiter.getPos(); 264 qpt++; 265 Dataset rds = DatasetFactory.zeros(is, CompoundDoubleDataset.class, qshape); 266 267 while (qiter.hasNext()) { 268 int i = 0; 269 for (; i < axis; i++) { 270 spos[i] = qpos[i]; 271 } 272 spos[i++] = qpt; 273 for (; i < rank; i++) { 274 spos[i] = qpos[i-1]; 275 } 276 277 Object obj = s.getObject(spos); 278 rds.set(obj, qpos); 279 } 280 rds.imultiply(f); 281 qds.imultiply(1-f); 282 qds.iadd(rds); 283 } 284 285 return qds; 286 } 287 288 /** 289 * Calculate quantile of dataset which is defined as the inverse of the cumulative distribution function (CDF) 290 * @param a 291 * @param q 292 * @return point at which CDF has value q 293 */ 294 @SuppressWarnings("unchecked") 295 public static double quantile(final Dataset a, final double q) { 296 if (q < 0 || q > 1) { 297 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 298 } 299 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a); 300 Double qv = qs.getQuantile(q); 301 if (qv == null) { 302 qv = pQuantile(qs.s.get(), q); 303 qs.setQuantile(q, qv); 304 } 305 return qv; 306 } 307 308 /** 309 * Calculate quantiles of dataset which is defined as the inverse of the cumulative distribution function (CDF) 310 * @param a 311 * @param values 312 * @return points at which CDF has given values 313 */ 314 @SuppressWarnings("unchecked") 315 public static double[] quantile(final Dataset a, final double... values) { 316 final double[] points = new double[values.length]; 317 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a); 318 for (int i = 0; i < points.length; i++) { 319 final double q = values[i]; 320 if (q < 0 || q > 1) { 321 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 322 } 323 Double qv = qs.getQuantile(q); 324 if (qv == null) { 325 qv = pQuantile(qs.s.get(), q); 326 qs.setQuantile(q, qv); 327 } 328 points[i] = qv; 329 } 330 331 return points; 332 } 333 334 /** 335 * Calculate quantiles of dataset which is defined as the inverse of the cumulative distribution function (CDF) 336 * @param a 337 * @param axis 338 * @param values 339 * @return points at which CDF has given values 340 */ 341 @SuppressWarnings({ "unchecked" }) 342 public static Dataset[] quantile(final Dataset a, int axis, final double... values) { 343 final Dataset[] points = new Dataset[values.length]; 344 final int is = a.getElementsPerItem(); 345 axis = a.checkAxis(axis); 346 347 if (is == 1) { 348 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a, axis); 349 for (int i = 0; i < points.length; i++) { 350 final double q = values[i]; 351 if (q < 0 || q > 1) { 352 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 353 } 354 Dataset qv = qs.getQuantile(axis, q); 355 if (qv == null) { 356 qv = pQuantile(qs.getSortedDataset(axis), axis, q); 357 qs.setQuantile(axis, q, qv); 358 } 359 points[i] = qv; 360 } 361 } else { 362 QStatisticsImpl<double[]> qs = (QStatisticsImpl<double[]>) getQStatistics(a); 363 Dataset w = DatasetFactory.zeros(1, a.getClass(), a.getShapeRef()); 364 for (int j = 0; j < is; j++) { 365 boolean copied = false; 366 367 for (int i = 0; i < points.length; i++) { 368 final double q = values[i]; 369 if (q < 0 || q > 1) { 370 throw new IllegalArgumentException("Quantile requested is outside [0,1]"); 371 } 372 Dataset qv = qs.getQuantile(axis, q); 373 if (qv == null) { 374 if (!copied) { 375 copied = true; 376 ((CompoundDataset) a).copyElements(w, j); 377 w.sort(axis); 378 } 379 qv = pQuantile(w, axis, q); 380 qs.setQuantile(axis, q, qv); 381 if (j == 0) { 382 points[i] = DatasetFactory.zeros(is, qv.getClass(), qv.getShapeRef()); 383 } 384 ((CompoundDoubleDataset) points[i]).setElements(qv, j); 385 } 386 } 387 } 388 } 389 390 return points; 391 } 392 393 /** 394 * @param a dataset 395 * @param axis 396 * @return median 397 */ 398 public static Dataset median(final Dataset a, int axis) { 399 axis = a.checkAxis(axis); 400 return getQStatistics(a, axis).getQuantile(axis, QStatisticsImpl.Q2); 401 } 402 403 /** 404 * @param a dataset 405 * @return median 406 */ 407 public static Object median(final Dataset a) { 408 return getQStatistics(a).getQuantile(QStatisticsImpl.Q2); 409 } 410 411 /** 412 * Interquartile range: Q3 - Q1 413 * @param a 414 * @return range 415 */ 416 @SuppressWarnings("unchecked") 417 public static Object iqr(final Dataset a) { 418 final int is = a.getElementsPerItem(); 419 if (is == 1) { 420 QStatisticsImpl<Double> qs = (QStatisticsImpl<Double>) getQStatistics(a); 421 return qs.getQuantile(QStatisticsImpl.Q3) - qs.getQuantile(QStatisticsImpl.Q1); 422 } 423 424 QStatisticsImpl<double[]> qs = (QStatisticsImpl<double[]>) getQStatistics(a); 425 double[] q1 = (double[]) qs.getQuantile(QStatisticsImpl.Q1); 426 double[] q3 = ((double[]) qs.getQuantile(QStatisticsImpl.Q3)).clone(); 427 for (int j = 0; j < is; j++) { 428 q3[j] -= q1[j]; 429 } 430 return q3; 431 } 432 433 /** 434 * Interquartile range: Q3 - Q1 435 * @param a 436 * @param axis 437 * @return range 438 */ 439 public static Dataset iqr(final Dataset a, int axis) { 440 axis = a.checkAxis(axis); 441 QStatisticsImpl<?> qs = getQStatistics(a, axis); 442 Dataset q3 = qs.getQuantile(axis, QStatisticsImpl.Q3); 443 444 return Maths.subtract(q3, qs.getQuantile(axis, QStatisticsImpl.Q1)); 445 } 446 447 static private HigherStatisticsImpl<?> getHigherStatistic(final Dataset a, boolean[] ignoreInvalids) { 448 boolean ignoreNaNs = false; 449 boolean ignoreInfs = false; 450 if (a.hasFloatingPointElements()) { 451 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 452 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 453 } 454 455 HigherStatisticsImpl<?> stats = a.getFirstMetadata(HigherStatisticsImpl.class); 456 if (stats == null || stats.isDirty) { 457 stats = calculateHigherMoments(a, ignoreNaNs, ignoreInfs); 458 a.setMetadata(stats); 459 } 460 461 return stats; 462 } 463 464 static private HigherStatisticsImpl<?> getHigherStatistic(final Dataset a, final boolean[] ignoreInvalids, final int axis) { 465 boolean ignoreNaNs = false; 466 boolean ignoreInfs = false; 467 if (a.hasFloatingPointElements()) { 468 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 469 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 470 } 471 472 HigherStatisticsImpl<?> stats = a.getFirstMetadata(HigherStatisticsImpl.class); 473 if (stats == null || stats.getSkewness(axis) == null || stats.isDirty) { 474 stats = calculateHigherMoments(a, ignoreNaNs, ignoreInfs, axis); 475 a.setMetadata(stats); 476 } 477 478 return stats; 479 } 480 481 private static class HigherStatisticsImpl<T> implements MetadataType { 482 private static final long serialVersionUID = -6587974784104116792L; 483 T skewness; 484 T kurtosis; 485 transient Map<Integer, ReferencedDataset> smap = new HashMap<>(); 486 transient Map<Integer, ReferencedDataset> kmap = new HashMap<>(); 487 488 @Dirtiable 489 private boolean isDirty = true; 490 491 @Override 492 public HigherStatisticsImpl<T> clone() { 493 return new HigherStatisticsImpl<>(this); 494 } 495 496 public HigherStatisticsImpl() { 497 } 498 499 private HigherStatisticsImpl(HigherStatisticsImpl<T> hstats) { 500 skewness = hstats.skewness; 501 kurtosis = hstats.kurtosis; 502 smap.putAll(hstats.smap); 503 kmap.putAll(hstats.kmap); 504 isDirty = hstats.isDirty; 505 } 506 507// public void setSkewness(T skewness) { 508// this.skewness = skewness; 509// } 510// 511// public void setKurtosis(T kurtosis) { 512// this.kurtosis = kurtosis; 513// } 514// 515// public T getSkewness() { 516// return skewness; 517// } 518// 519// public T getKurtosis() { 520// return kurtosis; 521// } 522 523 public Dataset getSkewness(int axis) { 524 ReferencedDataset rd = smap.get(axis); 525 return rd == null ? null : rd.get(); 526 } 527 528 public Dataset getKurtosis(int axis) { 529 ReferencedDataset rd = kmap.get(axis); 530 return rd == null ? null : rd.get(); 531 } 532 533 public void setSkewness(int axis, Dataset s) { 534 smap.put(axis, new ReferencedDataset(s)); 535 } 536 537 public void setKurtosis(int axis, Dataset k) { 538 kmap.put(axis, new ReferencedDataset(k)); 539 } 540 } 541 542 static private HigherStatisticsImpl<?> calculateHigherMoments(final Dataset a, final boolean ignoreNaNs, final boolean ignoreInfs) { 543 final int is = a.getElementsPerItem(); 544 final IndexIterator iter = a.getIterator(); 545 546 if (is == 1) { 547 Skewness s = new Skewness(); 548 Kurtosis k = new Kurtosis(); 549 if (ignoreNaNs) { 550 while (iter.hasNext()) { 551 final double x = a.getElementDoubleAbs(iter.index); 552 if (Double.isNaN(x)) 553 continue; 554 s.increment(x); 555 k.increment(x); 556 } 557 } else { 558 while (iter.hasNext()) { 559 final double x = a.getElementDoubleAbs(iter.index); 560 s.increment(x); 561 k.increment(x); 562 } 563 } 564 565 HigherStatisticsImpl<Double> stats = new HigherStatisticsImpl<Double>(); 566 stats.skewness = s.getResult(); 567 stats.kurtosis = k.getResult(); 568 return stats; 569 } 570 final Skewness[] s = new Skewness[is]; 571 final Kurtosis[] k = new Kurtosis[is]; 572 573 for (int j = 0; j < is; j++) { 574 s[j] = new Skewness(); 575 k[j] = new Kurtosis(); 576 } 577 if (ignoreNaNs) { 578 while (iter.hasNext()) { 579 boolean skip = false; 580 for (int j = 0; j < is; j++) { 581 if (Double.isNaN(a.getElementDoubleAbs(iter.index + j))) { 582 skip = true; 583 break; 584 } 585 } 586 if (!skip) 587 for (int j = 0; j < is; j++) { 588 final double val = a.getElementDoubleAbs(iter.index + j); 589 s[j].increment(val); 590 k[j].increment(val); 591 } 592 } 593 } else { 594 while (iter.hasNext()) { 595 for (int j = 0; j < is; j++) { 596 final double val = a.getElementDoubleAbs(iter.index + j); 597 s[j].increment(val); 598 k[j].increment(val); 599 } 600 } 601 } 602 final double[] ts = new double[is]; 603 final double[] tk = new double[is]; 604 for (int j = 0; j < is; j++) { 605 ts[j] = s[j].getResult(); 606 tk[j] = k[j].getResult(); 607 } 608 609 HigherStatisticsImpl<double[]> stats = new HigherStatisticsImpl<double[]>(); 610 stats.skewness = ts; 611 stats.kurtosis = tk; 612 return stats; 613 } 614 615 static private HigherStatisticsImpl<?> calculateHigherMoments(final Dataset a, final boolean ignoreNaNs, final boolean ignoreInfs, final int axis) { 616 final int rank = a.getRank(); 617 final int is = a.getElementsPerItem(); 618 final int[] oshape = a.getShape(); 619 final int alen = oshape[axis]; 620 oshape[axis] = 1; 621 622 final int[] nshape = ShapeUtils.squeezeShape(oshape, false); 623 final Dataset sk; 624 final Dataset ku; 625 HigherStatisticsImpl<?> stats = null; 626 627 if (is == 1) { 628 if (stats == null) { 629 stats = new HigherStatisticsImpl<Double>(); 630 a.setMetadata(stats); 631 } 632 sk = DatasetFactory.zeros(DoubleDataset.class, nshape); 633 ku = DatasetFactory.zeros(DoubleDataset.class, nshape); 634 final IndexIterator qiter = sk.getIterator(true); 635 final int[] qpos = qiter.getPos(); 636 final int[] spos = oshape; 637 638 while (qiter.hasNext()) { 639 int i = 0; 640 for (; i < axis; i++) { 641 spos[i] = qpos[i]; 642 } 643 spos[i++] = 0; 644 for (; i < rank; i++) { 645 spos[i] = qpos[i - 1]; 646 } 647 648 Skewness s = new Skewness(); 649 Kurtosis k = new Kurtosis(); 650 if (ignoreNaNs) { 651 for (int j = 0; j < alen; j++) { 652 spos[axis] = j; 653 final double val = a.getDouble(spos); 654 if (Double.isNaN(val)) 655 continue; 656 657 s.increment(val); 658 k.increment(val); 659 } 660 } else { 661 for (int j = 0; j < alen; j++) { 662 spos[axis] = j; 663 final double val = a.getDouble(spos); 664 s.increment(val); 665 k.increment(val); 666 } 667 } 668 sk.set(s.getResult(), qpos); 669 ku.set(k.getResult(), qpos); 670 } 671 } else { 672 if (stats == null) { 673 stats = new HigherStatisticsImpl<double[]>(); 674 a.setMetadata(stats); 675 } 676 sk = DatasetFactory.zeros(is, CompoundDoubleDataset.class, nshape); 677 ku = DatasetFactory.zeros(is, CompoundDoubleDataset.class, nshape); 678 final IndexIterator qiter = sk.getIterator(true); 679 final int[] qpos = qiter.getPos(); 680 final int[] spos = oshape; 681 final Skewness[] s = new Skewness[is]; 682 final Kurtosis[] k = new Kurtosis[is]; 683 final double[] ts = new double[is]; 684 final double[] tk = new double[is]; 685 686 for (int j = 0; j < is; j++) { 687 s[j] = new Skewness(); 688 k[j] = new Kurtosis(); 689 } 690 while (qiter.hasNext()) { 691 int i = 0; 692 for (; i < axis; i++) { 693 spos[i] = qpos[i]; 694 } 695 spos[i++] = 0; 696 for (; i < rank; i++) { 697 spos[i] = qpos[i-1]; 698 } 699 700 for (int j = 0; j < is; j++) { 701 s[j].clear(); 702 k[j].clear(); 703 } 704 int index = a.get1DIndex(spos); 705 if (ignoreNaNs) { 706 boolean skip = false; 707 for (int j = 0; j < is; j++) { 708 if (Double.isNaN(a.getElementDoubleAbs(index + j))) { 709 skip = true; 710 break; 711 } 712 } 713 if (!skip) 714 for (int j = 0; j < is; j++) { 715 final double val = a.getElementDoubleAbs(index + j); 716 s[j].increment(val); 717 k[j].increment(val); 718 } 719 } else { 720 for (int j = 0; j < is; j++) { 721 final double val = a.getElementDoubleAbs(index + j); 722 s[j].increment(val); 723 k[j].increment(val); 724 } 725 } 726 for (int j = 0; j < is; j++) { 727 ts[j] = s[j].getResult(); 728 tk[j] = k[j].getResult(); 729 } 730 sk.set(ts, qpos); 731 ku.set(tk, qpos); 732 } 733 } 734 735 stats.setSkewness(axis, sk); 736 stats.setKurtosis(axis, ku); 737 return stats; 738 } 739 740 /** 741 * @param a dataset 742 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 743 * @return skewness 744 * @since 2.0 745 */ 746 public static Object skewness(final Dataset a, final boolean... ignoreInvalids) { 747 return getHigherStatistic(a, ignoreInvalids).skewness; 748 } 749 750 /** 751 * @param a dataset 752 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 753 * @return kurtosis 754 * @since 2.0 755 */ 756 public static Object kurtosis(final Dataset a, final boolean... ignoreInvalids) { 757 return getHigherStatistic(a, ignoreInvalids).kurtosis; 758 } 759 760 /** 761 * @param a dataset 762 * @param axis 763 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 764 * @return skewness along axis in dataset 765 * @since 2.0 766 */ 767 public static Dataset skewness(final Dataset a, int axis, final boolean... ignoreInvalids) { 768 axis = a.checkAxis(axis); 769 return getHigherStatistic(a, ignoreInvalids, axis).getSkewness(axis); 770 } 771 772 /** 773 * @param a dataset 774 * @param axis 775 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 776 * @return kurtosis along axis in dataset 777 * @since 2.0 778 */ 779 public static Dataset kurtosis(final Dataset a, int axis, final boolean... ignoreInvalids) { 780 axis = a.checkAxis(axis); 781 return getHigherStatistic(a, ignoreInvalids, axis).getKurtosis(axis); 782 } 783 784 /** 785 * @param a dataset 786 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 787 * @return sum of dataset 788 * @since 2.0 789 */ 790 public static Object sum(final Dataset a, final boolean... ignoreInvalids) { 791 return a.sum(ignoreInvalids); 792 } 793 794 /** 795 * @param a dataset 796 * @param dtype 797 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 798 * @return typed sum of dataset 799 * @since 2.0 800 */ 801 public static Object typedSum(final Dataset a, int dtype, final boolean... ignoreInvalids) { 802 if (a.isComplex()) { 803 Complex m = (Complex) a.sum(ignoreInvalids); 804 return m; 805 } else if (a instanceof CompoundDataset) { 806 return DTypeUtils.fromDoublesToBiggestPrimitives((double[]) a.sum(ignoreInvalids), dtype); 807 } 808 return DTypeUtils.fromDoubleToBiggestNumber(DTypeUtils.toReal(a.sum(ignoreInvalids)), dtype); 809 } 810 811 /** 812 * @param a dataset 813 * @param dtype 814 * @param axis 815 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 816 * @return typed sum of items along axis in dataset 817 * @since 2.0 818 */ 819 public static Dataset typedSum(final Dataset a, int dtype, int axis, final boolean... ignoreInvalids) { 820 return DatasetUtils.cast(a.sum(axis, ignoreInvalids), dtype); 821 } 822 823 /** 824 * @param a dataset 825 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 826 * @return product of all items in dataset 827 * @since 2.0 828 */ 829 public static Object product(final Dataset a, final boolean... ignoreInvalids) { 830 return typedProduct(a, a.getDType(), ignoreInvalids); 831 } 832 833 /** 834 * @param a dataset 835 * @param axis 836 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 837 * @return product of items along axis in dataset 838 * @since 2.0 839 */ 840 public static Dataset product(final Dataset a, final int axis, final boolean... ignoreInvalids) { 841 return typedProduct(a, a.getDType(), axis, ignoreInvalids); 842 } 843 844 /** 845 * @param a dataset 846 * @param dtype 847 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 848 * @return typed product of all items in dataset 849 * @since 2.0 850 */ 851 public static Object typedProduct(final Dataset a, final int dtype, final boolean... ignoreInvalids) { 852 final boolean ignoreNaNs; 853 final boolean ignoreInfs; 854 if (a.hasFloatingPointElements()) { 855 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 856 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 857 } else { 858 ignoreNaNs = false; 859 ignoreInfs = false; 860 } 861 862 if (a.isComplex()) { 863 IndexIterator it = a.getIterator(); 864 double rv = 1, iv = 0; 865 866 while (it.hasNext()) { 867 final double r1 = a.getElementDoubleAbs(it.index); 868 final double i1 = a.getElementDoubleAbs(it.index + 1); 869 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 870 continue; 871 } 872 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 873 continue; 874 } 875 final double tv = r1*rv - i1*iv; 876 iv = r1*iv + i1*rv; 877 rv = tv; 878 if (Double.isNaN(rv) && Double.isNaN(iv)) { 879 break; 880 } 881 } 882 883 return new Complex(rv, iv); 884 } 885 886 IndexIterator it = a.getIterator(); 887 final int is; 888 final long[] lresults; 889 final double[] dresults; 890 891 switch (dtype) { 892 case Dataset.BOOL: 893 case Dataset.INT8: case Dataset.INT16: 894 case Dataset.INT32: case Dataset.INT64: 895 long lresult = 1; 896 while (it.hasNext()) { 897 lresult *= a.getElementLongAbs(it.index); 898 } 899 return new Long(lresult); 900 case Dataset.ARRAYINT8: case Dataset.ARRAYINT16: 901 case Dataset.ARRAYINT32: case Dataset.ARRAYINT64: 902 lresult = 1; 903 is = a.getElementsPerItem(); 904 lresults = new long[is]; 905 for (int j = 0; j < is; j++) { 906 lresults[j] = 1; 907 } 908 while (it.hasNext()) { 909 for (int j = 0; j < is; j++) 910 lresults[j] *= a.getElementLongAbs(it.index+j); 911 } 912 return lresults; 913 case Dataset.FLOAT32: case Dataset.FLOAT64: 914 double dresult = 1.; 915 while (it.hasNext()) { 916 final double x = a.getElementDoubleAbs(it.index); 917 if (ignoreNaNs && Double.isNaN(x)) { 918 continue; 919 } 920 if (ignoreInfs && Double.isInfinite(x)) { 921 continue; 922 } 923 dresult *= x; 924 if (Double.isNaN(dresult)) { 925 break; 926 } 927 } 928 return Double.valueOf(dresult); 929 case Dataset.ARRAYFLOAT32: 930 case Dataset.ARRAYFLOAT64: 931 is = a.getElementsPerItem(); 932 double[] vals = new double[is]; 933 dresults = new double[is]; 934 for (int j = 0; j < is; j++) { 935 dresults[j] = 1.; 936 } 937 while (it.hasNext()) { 938 boolean okay = true; 939 for (int j = 0; j < is; j++) { 940 final double val = a.getElementDoubleAbs(it.index + j); 941 if (ignoreNaNs && Double.isNaN(val)) { 942 okay = false; 943 break; 944 } 945 if (ignoreInfs && Double.isInfinite(val)) { 946 okay = false; 947 break; 948 } 949 vals[j] = val; 950 } 951 if (okay) { 952 for (int j = 0; j < is; j++) { 953 double val = vals[j]; 954 dresults[j] *= val; 955 } 956 } 957 } 958 return dresults; 959 } 960 961 return null; 962 } 963 964 /** 965 * @param a dataset 966 * @param dtype 967 * @param axis 968 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 969 * @return typed product of items along axis in dataset 970 * @since 2.0 971 */ 972 public static Dataset typedProduct(final Dataset a, final int dtype, int axis, final boolean... ignoreInvalids) { 973 axis = a.checkAxis(axis); 974 final int[] oshape = a.getShape(); 975 final int is = a.getElementsPerItem(); 976 final int alen = oshape[axis]; 977 oshape[axis] = 1; 978 979 final boolean ignoreNaNs; 980 final boolean ignoreInfs; 981 if (a.hasFloatingPointElements()) { 982 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 983 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 984 } else { 985 ignoreNaNs = false; 986 ignoreInfs = false; 987 } 988 @SuppressWarnings("deprecation") 989 Dataset result = DatasetFactory.zeros(is, oshape, dtype); 990 991 IndexIterator qiter = result.getIterator(true); 992 int[] qpos = qiter.getPos(); 993 int[] spos; 994 995 // TODO add getLongArray(long[], int...) to CompoundDataset 996 while (qiter.hasNext()) { 997 spos = qpos.clone(); 998 999 if (a.isComplex()) { 1000 double rv = 1, iv = 0; 1001 switch (dtype) { 1002 case Dataset.COMPLEX64: 1003 ComplexFloatDataset af = (ComplexFloatDataset) a; 1004 for (int j = 0; j < alen; j++) { 1005 spos[axis] = j; 1006 final float r1 = af.getReal(spos); 1007 final float i1 = af.getImag(spos); 1008 if (ignoreNaNs && (Float.isNaN(r1) || Float.isNaN(i1))) { 1009 continue; 1010 } 1011 if (ignoreInfs && (Float.isInfinite(r1) || Float.isInfinite(i1))) { 1012 continue; 1013 } 1014 final double tv = r1*rv - i1*iv; 1015 iv = r1*iv + i1*rv; 1016 rv = tv; 1017 if (Double.isNaN(rv) && Double.isNaN(iv)) { 1018 break; 1019 } 1020 } 1021 break; 1022 case Dataset.COMPLEX128: 1023 ComplexDoubleDataset ad = (ComplexDoubleDataset) a; 1024 for (int j = 0; j < alen; j++) { 1025 spos[axis] = j; 1026 final double r1 = ad.getReal(spos); 1027 final double i1 = ad.getImag(spos); 1028 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 1029 continue; 1030 } 1031 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 1032 continue; 1033 } 1034 final double tv = r1*rv - i1*iv; 1035 iv = r1*iv + i1*rv; 1036 rv = tv; 1037 if (Double.isNaN(rv) && Double.isNaN(iv)) { 1038 break; 1039 } 1040 } 1041 break; 1042 } 1043 1044 result.set(new Complex(rv, iv), qpos); 1045 } else { 1046 final long[] lresults; 1047 final double[] dresults; 1048 1049 switch (dtype) { 1050 case Dataset.BOOL: 1051 case Dataset.INT8: case Dataset.INT16: 1052 case Dataset.INT32: case Dataset.INT64: 1053 long lresult = 1; 1054 for (int j = 0; j < alen; j++) { 1055 spos[axis] = j; 1056 lresult *= a.getInt(spos); 1057 } 1058 result.set(lresult, qpos); 1059 break; 1060 case Dataset.ARRAYINT8: 1061 lresults = new long[is]; 1062 for (int k = 0; k < is; k++) { 1063 lresults[k] = 1; 1064 } 1065 for (int j = 0; j < alen; j++) { 1066 spos[axis] = j; 1067 final byte[] va = (byte[]) a.getObject(spos); 1068 for (int k = 0; k < is; k++) { 1069 lresults[k] *= va[k]; 1070 } 1071 } 1072 result.set(lresults, qpos); 1073 break; 1074 case Dataset.ARRAYINT16: 1075 lresults = new long[is]; 1076 for (int k = 0; k < is; k++) { 1077 lresults[k] = 1; 1078 } 1079 for (int j = 0; j < alen; j++) { 1080 spos[axis] = j; 1081 final short[] va = (short[]) a.getObject(spos); 1082 for (int k = 0; k < is; k++) { 1083 lresults[k] *= va[k]; 1084 } 1085 } 1086 result.set(lresults, qpos); 1087 break; 1088 case Dataset.ARRAYINT32: 1089 lresults = new long[is]; 1090 for (int k = 0; k < is; k++) { 1091 lresults[k] = 1; 1092 } 1093 for (int j = 0; j < alen; j++) { 1094 spos[axis] = j; 1095 final int[] va = (int[]) a.getObject(spos); 1096 for (int k = 0; k < is; k++) { 1097 lresults[k] *= va[k]; 1098 } 1099 } 1100 result.set(lresults, qpos); 1101 break; 1102 case Dataset.ARRAYINT64: 1103 lresults = new long[is]; 1104 for (int k = 0; k < is; k++) { 1105 lresults[k] = 1; 1106 } 1107 for (int j = 0; j < alen; j++) { 1108 spos[axis] = j; 1109 final long[] va = (long[]) a.getObject(spos); 1110 for (int k = 0; k < is; k++) { 1111 lresults[k] *= va[k]; 1112 } 1113 } 1114 result.set(lresults, qpos); 1115 break; 1116 case Dataset.FLOAT32: case Dataset.FLOAT64: 1117 double dresult = 1.; 1118 for (int j = 0; j < alen; j++) { 1119 spos[axis] = j; 1120 final double x = a.getDouble(spos); 1121 if (ignoreNaNs && Double.isNaN(x)) { 1122 continue; 1123 } 1124 if (ignoreInfs && Double.isInfinite(x)) { 1125 continue; 1126 } 1127 dresult *= x; 1128 if (Double.isNaN(dresult)) { 1129 break; 1130 } 1131 } 1132 result.set(dresult, qpos); 1133 break; 1134 case Dataset.ARRAYFLOAT32: case Dataset.ARRAYFLOAT64: 1135 CompoundDataset da = (CompoundDataset) a; 1136 double[] dvalues = new double[is]; 1137 dresults = new double[is]; 1138 for (int k = 0; k < is; k++) { 1139 dresults[k] = 1.; 1140 } 1141 for (int j = 0; j < alen; j++) { 1142 spos[axis] = j; 1143 da.getDoubleArray(dvalues, spos); 1144 boolean okay = true; 1145 for (int k = 0; k < is; k++) { 1146 final double val = dvalues[k]; 1147 if (ignoreNaNs && Double.isNaN(val)) { 1148 okay = false; 1149 break; 1150 } 1151 if (ignoreInfs && Double.isInfinite(val)) { 1152 okay = false; 1153 break; 1154 } 1155 } 1156 if (okay) { 1157 for (int k = 0; k < is; k++) { 1158 dresults[k] *= dvalues[k]; 1159 } 1160 } 1161 } 1162 result.set(dresults, qpos); 1163 break; 1164 } 1165 } 1166 } 1167 1168 result.setShape(ShapeUtils.squeezeShape(oshape, axis)); 1169 return result; 1170 } 1171 1172 /** 1173 * @param a dataset 1174 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 1175 * @return cumulative product of items in flattened dataset 1176 * @since 2.0 1177 */ 1178 public static Dataset cumulativeProduct(final Dataset a, final boolean... ignoreInvalids) { 1179 return cumulativeProduct(a.flatten(), 0, ignoreInvalids); 1180 } 1181 1182 /** 1183 * @param a dataset 1184 * @param axis 1185 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 1186 * @return cumulative product of items along axis in dataset 1187 * @since 2.0 1188 */ 1189 public static Dataset cumulativeProduct(final Dataset a, int axis, final boolean... ignoreInvalids) { 1190 axis = a.checkAxis(axis); 1191 int dtype = a.getDType(); 1192 int[] oshape = a.getShape(); 1193 int alen = oshape[axis]; 1194 oshape[axis] = 1; 1195 1196 final boolean ignoreNaNs; 1197 final boolean ignoreInfs; 1198 if (a.hasFloatingPointElements()) { 1199 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 1200 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 1201 } else { 1202 ignoreNaNs = false; 1203 ignoreInfs = false; 1204 } 1205 Dataset result = DatasetFactory.zeros(a); 1206 PositionIterator pi = result.getPositionIterator(axis); 1207 1208 int[] pos = pi.getPos(); 1209 1210 while (pi.hasNext()) { 1211 1212 if (a.isComplex()) { 1213 double rv = 1, iv = 0; 1214 switch (dtype) { 1215 case Dataset.COMPLEX64: 1216 ComplexFloatDataset af = (ComplexFloatDataset) a; 1217 ComplexFloatDataset rf = (ComplexFloatDataset) result; 1218 for (int j = 0; j < alen; j++) { 1219 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1220 pos[axis] = j; 1221 final float r1 = af.getReal(pos); 1222 final float i1 = af.getImag(pos); 1223 if (ignoreNaNs && (Float.isNaN(r1) || Float.isNaN(i1))) { 1224 continue; 1225 } 1226 if (ignoreInfs && (Float.isInfinite(r1) || Float.isInfinite(i1))) { 1227 continue; 1228 } 1229 final double tv = r1*rv - i1*iv; 1230 iv = r1*iv + i1*rv; 1231 rv = tv; 1232 } 1233 rf.set((float) rv, (float) iv, pos); 1234 } 1235 break; 1236 case Dataset.COMPLEX128: 1237 ComplexDoubleDataset ad = (ComplexDoubleDataset) a; 1238 ComplexDoubleDataset rd = (ComplexDoubleDataset) result; 1239 for (int j = 0; j < alen; j++) { 1240 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1241 pos[axis] = j; 1242 final double r1 = ad.getReal(pos); 1243 final double i1 = ad.getImag(pos); 1244 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 1245 continue; 1246 } 1247 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 1248 continue; 1249 } 1250 final double tv = r1*rv - i1*iv; 1251 iv = r1*iv + i1*rv; 1252 rv = tv; 1253 } 1254 rd.set(rv, iv, pos); 1255 } 1256 break; 1257 } 1258 } else { 1259 final int is; 1260 final long[] lresults; 1261 final double[] dresults; 1262 1263 switch (dtype) { 1264 case Dataset.BOOL: 1265 case Dataset.INT8: case Dataset.INT16: 1266 case Dataset.INT32: case Dataset.INT64: 1267 long lresult = 1; 1268 for (int j = 0; j < alen; j++) { 1269 pos[axis] = j; 1270 lresult *= a.getInt(pos); 1271 result.set(lresult, pos); 1272 } 1273 break; 1274 case Dataset.ARRAYINT8: 1275 is = a.getElementsPerItem(); 1276 lresults = new long[is]; 1277 for (int k = 0; k < is; k++) { 1278 lresults[k] = 1; 1279 } 1280 for (int j = 0; j < alen; j++) { 1281 pos[axis] = j; 1282 final byte[] va = (byte[]) a.getObject(pos); 1283 for (int k = 0; k < is; k++) { 1284 lresults[k] *= va[k]; 1285 } 1286 result.set(lresults, pos); 1287 } 1288 break; 1289 case Dataset.ARRAYINT16: 1290 is = a.getElementsPerItem(); 1291 lresults = new long[is]; 1292 for (int k = 0; k < is; k++) { 1293 lresults[k] = 1; 1294 } 1295 for (int j = 0; j < alen; j++) { 1296 pos[axis] = j; 1297 final short[] va = (short[]) a.getObject(pos); 1298 for (int k = 0; k < is; k++) { 1299 lresults[k] *= va[k]; 1300 } 1301 result.set(lresults, pos); 1302 } 1303 break; 1304 case Dataset.ARRAYINT32: 1305 is = a.getElementsPerItem(); 1306 lresults = new long[is]; 1307 for (int k = 0; k < is; k++) { 1308 lresults[k] = 1; 1309 } 1310 for (int j = 0; j < alen; j++) { 1311 pos[axis] = j; 1312 final int[] va = (int[]) a.getObject(pos); 1313 for (int k = 0; k < is; k++) { 1314 lresults[k] *= va[k]; 1315 } 1316 result.set(lresults, pos); 1317 } 1318 break; 1319 case Dataset.ARRAYINT64: 1320 is = a.getElementsPerItem(); 1321 lresults = new long[is]; 1322 for (int k = 0; k < is; k++) { 1323 lresults[k] = 1; 1324 } 1325 for (int j = 0; j < alen; j++) { 1326 pos[axis] = j; 1327 final long[] va = (long[]) a.getObject(pos); 1328 for (int k = 0; k < is; k++) { 1329 lresults[k] *= va[k]; 1330 } 1331 result.set(lresults, pos); 1332 } 1333 break; 1334 case Dataset.FLOAT32: case Dataset.FLOAT64: 1335 double dresult = 1.; 1336 for (int j = 0; j < alen; j++) { 1337 if (!Double.isNaN(dresult)) { 1338 pos[axis] = j; 1339 final double x = a.getDouble(pos); 1340 if (ignoreNaNs && Double.isNaN(x)) { 1341 continue; 1342 } 1343 if (ignoreInfs && Double.isInfinite(x)) { 1344 continue; 1345 } 1346 dresult *= x; 1347 } 1348 result.set(dresult, pos); 1349 } 1350 break; 1351 case Dataset.ARRAYFLOAT32: case Dataset.ARRAYFLOAT64: 1352 is = a.getElementsPerItem(); 1353 CompoundDataset da = (CompoundDataset) a; 1354 double[] dvalues = new double[is]; 1355 dresults = new double[is]; 1356 for (int k = 0; k < is; k++) { 1357 dresults[k] = 1.; 1358 } 1359 for (int j = 0; j < alen; j++) { 1360 pos[axis] = j; 1361 da.getDoubleArray(dvalues, pos); 1362 boolean okay = true; 1363 for (int k = 0; k < is; k++) { 1364 final double val = dvalues[k]; 1365 if (ignoreNaNs && Double.isNaN(val)) { 1366 okay = false; 1367 break; 1368 } 1369 if (ignoreInfs && Double.isInfinite(val)) { 1370 okay = false; 1371 break; 1372 } 1373 } 1374 if (okay) { 1375 for (int k = 0; k < is; k++) { 1376 dresults[k] *= dvalues[k]; 1377 } 1378 } 1379 result.set(dresults, pos); 1380 } 1381 break; 1382 } 1383 } 1384 } 1385 1386 return result; 1387 } 1388 1389 /** 1390 * @param a dataset 1391 * @param ignoreInvalids see {@link IDataset#max(boolean...)} 1392 * @return cumulative sum of items in flattened dataset 1393 * @since 2.0 1394 */ 1395 public static Dataset cumulativeSum(final Dataset a, final boolean... ignoreInvalids) { 1396 return cumulativeSum(a.flatten(), 0, ignoreInvalids); 1397 } 1398 1399 /** 1400 * @param a dataset 1401 * @param axis 1402 * @param ignoreInvalids see {@link Dataset#max(int, boolean...)} 1403 * @return cumulative sum of items along axis in dataset 1404 * @since 2.0 1405 */ 1406 public static Dataset cumulativeSum(final Dataset a, int axis, final boolean... ignoreInvalids) { 1407 axis = a.checkAxis(axis); 1408 int dtype = a.getDType(); 1409 int[] oshape = a.getShape(); 1410 int alen = oshape[axis]; 1411 oshape[axis] = 1; 1412 1413 final boolean ignoreNaNs; 1414 final boolean ignoreInfs; 1415 if (a.hasFloatingPointElements()) { 1416 ignoreNaNs = ignoreInvalids != null && ignoreInvalids.length > 0 ? ignoreInvalids[0] : false; 1417 ignoreInfs = ignoreInvalids != null && ignoreInvalids.length > 1 ? ignoreInvalids[1] : ignoreNaNs; 1418 } else { 1419 ignoreNaNs = false; 1420 ignoreInfs = false; 1421 } 1422 Dataset result = DatasetFactory.zeros(a); 1423 PositionIterator pi = result.getPositionIterator(axis); 1424 1425 int[] pos = pi.getPos(); 1426 1427 while (pi.hasNext()) { 1428 1429 if (a.isComplex()) { 1430 double rv = 0, iv = 0; 1431 switch (dtype) { 1432 case Dataset.COMPLEX64: 1433 ComplexFloatDataset af = (ComplexFloatDataset) a; 1434 ComplexFloatDataset rf = (ComplexFloatDataset) result; 1435 for (int j = 0; j < alen; j++) { 1436 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1437 pos[axis] = j; 1438 final float r1 = af.getReal(pos); 1439 final float i1 = af.getImag(pos); 1440 if (ignoreNaNs && (Float.isNaN(r1) || Float.isNaN(i1))) { 1441 continue; 1442 } 1443 if (ignoreInfs && (Float.isInfinite(r1) || Float.isInfinite(i1))) { 1444 continue; 1445 } 1446 rv += r1; 1447 iv += i1; 1448 } 1449 rf.set((float) rv, (float) iv, pos); 1450 } 1451 break; 1452 case Dataset.COMPLEX128: 1453 ComplexDoubleDataset ad = (ComplexDoubleDataset) a; 1454 ComplexDoubleDataset rd = (ComplexDoubleDataset) result; 1455 for (int j = 0; j < alen; j++) { 1456 if (!Double.isNaN(rv) || !Double.isNaN(iv)) { 1457 pos[axis] = j; 1458 final double r1 = ad.getReal(pos); 1459 final double i1 = ad.getImag(pos); 1460 if (ignoreNaNs && (Double.isNaN(r1) || Double.isNaN(i1))) { 1461 continue; 1462 } 1463 if (ignoreInfs && (Double.isInfinite(r1) || Double.isInfinite(i1))) { 1464 continue; 1465 } 1466 rv += r1; 1467 iv += i1; 1468 } 1469 rd.set(rv, iv, pos); 1470 } 1471 break; 1472 } 1473 } else { 1474 final int is; 1475 final long[] lresults; 1476 final double[] dresults; 1477 1478 switch (dtype) { 1479 case Dataset.BOOL: 1480 case Dataset.INT8: case Dataset.INT16: 1481 case Dataset.INT32: case Dataset.INT64: 1482 long lresult = 0; 1483 for (int j = 0; j < alen; j++) { 1484 pos[axis] = j; 1485 lresult += a.getInt(pos); 1486 result.set(lresult, pos); 1487 } 1488 break; 1489 case Dataset.ARRAYINT8: 1490 is = a.getElementsPerItem(); 1491 lresults = new long[is]; 1492 for (int j = 0; j < alen; j++) { 1493 pos[axis] = j; 1494 final byte[] va = (byte[]) a.getObject(pos); 1495 for (int k = 0; k < is; k++) { 1496 lresults[k] += va[k]; 1497 } 1498 result.set(lresults, pos); 1499 } 1500 break; 1501 case Dataset.ARRAYINT16: 1502 is = a.getElementsPerItem(); 1503 lresults = new long[is]; 1504 for (int j = 0; j < alen; j++) { 1505 pos[axis] = j; 1506 final short[] va = (short[]) a.getObject(pos); 1507 for (int k = 0; k < is; k++) { 1508 lresults[k] += va[k]; 1509 } 1510 result.set(lresults, pos); 1511 } 1512 break; 1513 case Dataset.ARRAYINT32: 1514 is = a.getElementsPerItem(); 1515 lresults = new long[is]; 1516 for (int j = 0; j < alen; j++) { 1517 pos[axis] = j; 1518 final int[] va = (int[]) a.getObject(pos); 1519 for (int k = 0; k < is; k++) { 1520 lresults[k] += va[k]; 1521 } 1522 result.set(lresults, pos); 1523 } 1524 break; 1525 case Dataset.ARRAYINT64: 1526 is = a.getElementsPerItem(); 1527 lresults = new long[is]; 1528 for (int j = 0; j < alen; j++) { 1529 pos[axis] = j; 1530 final long[] va = (long[]) a.getObject(pos); 1531 for (int k = 0; k < is; k++) { 1532 lresults[k] += va[k]; 1533 } 1534 result.set(lresults, pos); 1535 } 1536 break; 1537 case Dataset.FLOAT32: case Dataset.FLOAT64: 1538 double dresult = 0.; 1539 for (int j = 0; j < alen; j++) { 1540 if (!Double.isNaN(dresult)) { 1541 pos[axis] = j; 1542 final double x = a.getDouble(pos); 1543 if (ignoreNaNs && Double.isNaN(x)) { 1544 continue; 1545 } 1546 if (ignoreInfs && Double.isInfinite(x)) { 1547 continue; 1548 } 1549 dresult += x; 1550 } 1551 result.set(dresult, pos); 1552 } 1553 break; 1554 case Dataset.ARRAYFLOAT32: case Dataset.ARRAYFLOAT64: 1555 is = a.getElementsPerItem(); 1556 CompoundDataset da = (CompoundDataset) a; 1557 double[] dvalues = new double[is]; 1558 dresults = new double[is]; 1559 for (int j = 0; j < alen; j++) { 1560 pos[axis] = j; 1561 da.getDoubleArray(dvalues, pos); 1562 boolean okay = true; 1563 for (int k = 0; k < is; k++) { 1564 final double val = dvalues[k]; 1565 if (ignoreNaNs && Double.isNaN(val)) { 1566 okay = false; 1567 break; 1568 } 1569 if (ignoreInfs && Double.isInfinite(val)) { 1570 okay = false; 1571 break; 1572 } 1573 } 1574 if (okay) { 1575 for (int k = 0; k < is; k++) { 1576 dresults[k] += dvalues[k]; 1577 } 1578 } 1579 result.set(dresults, pos); 1580 } 1581 break; 1582 } 1583 } 1584 } 1585 1586 return result; 1587 } 1588 1589 /** 1590 * @param a 1591 * @return average deviation value of all items the dataset 1592 */ 1593 public static Object averageDeviation(final Dataset a) { 1594 final IndexIterator it = a.getIterator(); 1595 final int is = a.getElementsPerItem(); 1596 1597 if (is == 1) { 1598 double mean = (Double) a.mean(); 1599 double sum = 0.0; 1600 1601 while (it.hasNext()) { 1602 sum += Math.abs(a.getElementDoubleAbs(it.index) - mean); 1603 } 1604 1605 return sum / a.getSize(); 1606 } 1607 1608 double[] means = (double[]) a.mean(); 1609 double[] sums = new double[is]; 1610 1611 while (it.hasNext()) { 1612 for (int j = 0; j < is; j++) 1613 sums[j] += Math.abs(a.getElementDoubleAbs(it.index + j) - means[j]); 1614 } 1615 1616 double n = a.getSize(); 1617 for (int j = 0; j < is; j++) 1618 sums[j] /= n; 1619 1620 return sums; 1621 } 1622 1623 /** 1624 * The residual is the sum of squared differences 1625 * @param a 1626 * @param b 1627 * @return residual value 1628 */ 1629 public static double residual(final Dataset a, final Dataset b) { 1630 return a.residual(b); 1631 } 1632 1633 /** 1634 * The residual is the sum of squared differences 1635 * @param a 1636 * @param b 1637 * @param w 1638 * @return residual value 1639 */ 1640 public static double weightedResidual(final Dataset a, final Dataset b, final Dataset w) { 1641 return a.residual(b, w, false); 1642 } 1643 1644 /** 1645 * Calculate approximate outlier values. These are defined as the values in the dataset 1646 * that are approximately below and above the given thresholds - in terms of percentages 1647 * of dataset size. 1648 * <p> 1649 * It approximates by limiting the number of items (given by length) used internally by 1650 * data structures - the larger this is, the more accurate will those outlier values become. 1651 * The actual thresholds used are returned in the array. 1652 * <p> 1653 * Also, the low and high values will be made distinct if possible by adjusting the thresholds 1654 * @param a 1655 * @param lo percentage threshold for lower limit 1656 * @param hi percentage threshold for higher limit 1657 * @param length maximum number of items used internally, if negative, then unlimited 1658 * @return double array with low and high values, and low and high percentage thresholds 1659 */ 1660 public static double[] outlierValues(final Dataset a, double lo, double hi, final int length) { 1661 return outlierValues(a, null, true, lo, hi, length); 1662 } 1663 1664 /** 1665 * Calculate approximate outlier values. These are defined as the values in the dataset 1666 * that are approximately below and above the given thresholds - in terms of percentages 1667 * of dataset size. 1668 * <p> 1669 * It approximates by limiting the number of items (given by length) used internally by 1670 * data structures - the larger this is, the more accurate will those outlier values become. 1671 * The actual thresholds used are returned in the array. 1672 * <p> 1673 * Also, the low and high values will be made distinct if possible by adjusting the thresholds 1674 * @param a 1675 * @param mask can be null 1676 * @param value value of mask to match to include for calculation 1677 * @param lo percentage threshold for lower limit 1678 * @param hi percentage threshold for higher limit 1679 * @param length maximum number of items used internally, if negative, then unlimited 1680 * @return double array with low and high values, and low and high percentage thresholds 1681 * @since 2.1 1682 */ 1683 public static double[] outlierValues(final Dataset a, final Dataset mask, final boolean value, double lo, double hi, final int length) { 1684 if (lo <= 0 || hi <= 0 || lo >= hi || hi >= 100 || Double.isNaN(lo)|| Double.isNaN(hi)) { 1685 throw new IllegalArgumentException("Thresholds must be between (0,100) and in order"); 1686 } 1687 final int size = a.getSize(); 1688 int nl = Math.max((int) ((lo*size)/100), 1); 1689 if (length > 0 && nl > length) 1690 nl = length; 1691 int nh = Math.max((int) (((100-hi)*size)/100), 1); 1692 if (length > 0 && nh > length) 1693 nh = length; 1694 1695 IndexIterator it = mask == null ? a.getIterator() : a.getBooleanIterator(mask, value); 1696 double[] results = Math.max(nl, nh) > 640 ? outlierValuesMap(a, it, nl, nh) : outlierValuesList(a, it, nl, nh); 1697 1698 results[2] = results[2]*100./size; 1699 results[3] = 100. - results[3]*100./size; 1700 return results; 1701 } 1702 1703 static double[] outlierValuesMap(final Dataset a, final IndexIterator it, int nl, int nh) { 1704 final TreeMap<Double, Integer> lMap = new TreeMap<Double, Integer>(); 1705 final TreeMap<Double, Integer> hMap = new TreeMap<Double, Integer>(); 1706 1707 int ml = 0; 1708 int mh = 0; 1709 while (it.hasNext()) { 1710 Double x = a.getElementDoubleAbs(it.index); 1711 if (Double.isNaN(x)) { 1712 continue; 1713 } 1714 Integer i; 1715 if (ml == nl) { 1716 Double k = lMap.lastKey(); 1717 if (x < k) { 1718 i = lMap.get(k) - 1; 1719 if (i == 0) { 1720 lMap.remove(k); 1721 } else { 1722 lMap.put(k, i); 1723 } 1724 i = lMap.get(x); 1725 if (i == null) { 1726 lMap.put(x, 1); 1727 } else { 1728 lMap.put(x, i + 1); 1729 } 1730 } 1731 } else { 1732 i = lMap.get(x); 1733 if (i == null) { 1734 lMap.put(x, 1); 1735 } else { 1736 lMap.put(x, i + 1); 1737 } 1738 ml++; 1739 } 1740 1741 if (mh == nh) { 1742 Double k = hMap.firstKey(); 1743 if (x > k) { 1744 i = hMap.get(k) - 1; 1745 if (i == 0) { 1746 hMap.remove(k); 1747 } else { 1748 hMap.put(k, i); 1749 } 1750 i = hMap.get(x); 1751 if (i == null) { 1752 hMap.put(x, 1); 1753 } else { 1754 hMap.put(x, i+1); 1755 } 1756 } 1757 } else { 1758 i = hMap.get(x); 1759 if (i == null) { 1760 hMap.put(x, 1); 1761 } else { 1762 hMap.put(x, i+1); 1763 } 1764 mh++; 1765 } 1766 } 1767 1768 // Attempt to make values distinct 1769 double lx = lMap.lastKey(); 1770 double hx = hMap.firstKey(); 1771 if (lx >= hx) { 1772 Double h = hMap.higherKey(lx); 1773 if (h != null) { 1774 hx = h; 1775 mh--; 1776 } else { 1777 Double l = lMap.lowerKey(hx); 1778 if (l != null) { 1779 lx = l; 1780 ml--; 1781 } 1782 } 1783 1784 } 1785 return new double[] {lMap.lastKey(), hMap.firstKey(), ml, mh}; 1786 } 1787 1788 static double[] outlierValuesList(final Dataset a, final IndexIterator it, int nl, int nh) { 1789 final List<Double> lList = new ArrayList<Double>(nl); 1790 final List<Double> hList = new ArrayList<Double>(nh); 1791 1792 double lx = Double.POSITIVE_INFINITY; 1793 double hx = Double.NEGATIVE_INFINITY; 1794 1795 while (it.hasNext()) { 1796 double x = a.getElementDoubleAbs(it.index); 1797 if (Double.isNaN(x)) { 1798 continue; 1799 } 1800 if (x < lx) { 1801 if (lList.size() == nl) { 1802 lList.remove(lx); 1803 } 1804 lList.add(x); 1805 lx = Collections.max(lList); 1806 } else if (x == lx) { 1807 if (lList.size() < nl) { 1808 lList.add(x); 1809 } 1810 } 1811 1812 if (x > hx) { 1813 if (hList.size() == nh) { 1814 hList.remove(hx); 1815 } 1816 hList.add(x); 1817 hx = Collections.min(hList); 1818 } else if (x == hx) { 1819 if (hList.size() < nh) { 1820 hList.add(x); 1821 } 1822 } 1823 } 1824 1825 nl = lList.size(); 1826 nh = hList.size(); 1827 1828 // Attempt to make values distinct 1829 if (lx >= hx) { 1830 Collections.sort(hList); 1831 for (double h : hList) { 1832 if (h > hx) { 1833 hx = h; 1834 break; 1835 } 1836 nh--; 1837 } 1838 if (lx >= hx) { 1839 Collections.sort(lList); 1840 Collections.reverse(lList); 1841 for (double l : lList) { 1842 if (l < lx) { 1843 lx = l; 1844 break; 1845 } 1846 nl--; 1847 } 1848 } 1849 } 1850 return new double[] {lx, hx, nl, nh}; 1851 } 1852 1853 /** 1854 * See {@link #covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)} with b = null, rowvar = true, bias = false and ddof = null. 1855 * @param a 1856 * @return covariance array of a 1857 */ 1858 public static Dataset covariance(final Dataset a) { 1859 return covariance(a, true, false, null); 1860 } 1861 1862 /** 1863 * See {@link #covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)} with b = null. 1864 * @param a 1865 * @return covariance array of a 1866 * @since 2.0 1867 */ 1868 public static Dataset covariance(final Dataset a, 1869 boolean rowvar, boolean bias, Integer ddof) { 1870 return covariance(a, null, rowvar, bias, ddof); 1871 } 1872 1873 /** 1874 * See {@link #covariance(Dataset a, Dataset b, boolean rowvar, boolean bias, Integer ddof)} with b = null, rowvar = true, bias = false and ddof = null. 1875 * @param a 1876 * @return covariance array of a concatenated with b 1877 */ 1878 public static Dataset covariance(final Dataset a, final Dataset b) { 1879 return covariance(a, b, true, false, null); 1880 } 1881 1882 /** 1883 * Calculate the covariance matrix (array) of a concatenated with b. This 1884 * method is directly based on the implementation in numpy (cov). 1885 * @param a Array containing multiple variable and observations. Each row represents a variable, each column an observation. 1886 * @param b An extra set of variables and observations. Must be of same type as a and have a compatible shape. 1887 * @param rowvar When true (default), each row is a variable; when false each column is a variable. 1888 * @param bias Default normalisation is (N - 1) - N is number of observations. If set true, normalisation is (N). 1889 * @param ddof Default normalisation is (N - 1). If ddof is set, then normalisation is (N - ddof). 1890 * @return covariance array of a concatenated with b 1891 * @since 2.0 1892 */ 1893 public static Dataset covariance (final Dataset a, final Dataset b, 1894 boolean rowvar, boolean bias, Integer ddof) { 1895 1896 //Create a working copy of the dataset & check its rank. 1897 Dataset vars = a.clone(); 1898 if (a.getRank() == 1) { 1899 vars.setShape(1, a.getShape()[0]); 1900 } 1901 1902 //1D of variables, so consider rows as variables 1903 if (vars.getShape()[0] == 1) { 1904 rowvar = true; 1905 } 1906 1907 //nr is the number of records; axis is index 1908 int nr, axis; 1909 if (rowvar) { 1910 nr = vars.getShape()[1]; 1911 axis = 0; 1912 } else { 1913 nr = vars.getShape()[0]; 1914 axis = 1; 1915 } 1916 1917 //Set the reduced degrees of freedom & normalisation factor 1918 if (ddof == null) { 1919 if (bias == false) { 1920 ddof = 1; 1921 } else { 1922 ddof = 0; 1923 } 1924 } 1925 double norm_fact = nr - ddof; 1926 if (norm_fact <= 0.) { 1927 //TODO Some sort of warning here? 1928 norm_fact = 0.; 1929 } 1930 1931 //Concatenate additional set of variables with main set 1932 if (b != null) { 1933 //Create a working copy of the dataset & check its rank. 1934 Dataset extraVars = b.clone(); 1935 if (b.getRank() == 1) { 1936 extraVars.setShape(1, a.getShape()[0]); 1937 } 1938 vars = DatasetUtils.concatenate(new Dataset[]{vars, extraVars}, axis); 1939 } 1940 1941 //Calculate deviations & covariance matrix 1942 Dataset varsMean = vars.mean(1-axis, false); 1943 //-vars & varsMean need same shape (this is a hack!) 1944 int[] meanShape = vars.getShape(); 1945 meanShape[1-axis] = 1; 1946 varsMean.setShape(meanShape); 1947 vars.isubtract(varsMean); 1948 Dataset cov; 1949 if (rowvar) { 1950 cov = Maths.divide(LinearAlgebra.dotProduct(vars, Maths.conjugate(vars.transpose())), norm_fact).squeeze(); 1951 } else { 1952 cov = Maths.divide(LinearAlgebra.dotProduct(vars.transpose(), Maths.conjugate(vars)), norm_fact).squeeze(); 1953 } 1954 return cov; 1955 } 1956}