kernel_density_estimation.js

import interquartileRange from "./interquartile_range.js";
import stddev from "./sample_standard_deviation.js";

const SQRT_2PI = Math.sqrt(2 * Math.PI);

/**
 * [常用核函数](https://en.wikipedia.org/wiki/Kernel_(statistics)#Kernel_functions_in_common_use)
 * @private
 */
const kernels = {
    /**
     * 高斯核函数
     * @private
     */
    gaussian: function (u) {
        return Math.exp(-0.5 * u * u) / SQRT_2PI;
    }
};

/**
 * 常用带宽选择方法
 * @private
 */
const bandwidthMethods = {
    /**
     * ["正态参考分布"经验法则](https://stat.ethz.ch/R-manual/R-devel/library/MASS/html/bandwidth.nrd.html),
     * [Silverman经验法则](https://en.wikipedia.org/wiki/Kernel_density_estimation#A_rule-of-thumb_bandwidth_estimator)的常用实现
     * @private
     */
    nrd: function (x) {
        let s = stddev(x);
        const iqr = interquartileRange(x);
        if (typeof iqr === "number") {
            s = Math.min(s, iqr / 1.34);
        }
        return 1.06 * s * Math.pow(x.length, -0.2);
    }
};

/**
 * [核密度估计](https://en.wikipedia.org/wiki/Kernel_density_estimation)是
 * 一种通过样本数据估计潜在概率分布形态的非参数方法
 *
 * @name kernelDensityEstimation
 * @param X 样本值数组
 * @param kernel 核函数。若提供函数,应返回非负值且积分为1。默认为高斯核
 * @param bandwidthMethod 带宽选择方法或固定带宽值。默认为"nrd",即常用的["正态参考分布"经验法则](https://stat.ethz.ch/R-manual/R-devel/library/MASS/html/bandwidth.nrd.html)
 * @returns {Function} 估计得到的[概率密度函数](https://en.wikipedia.org/wiki/Probability_density_function),时间复杂度为O(X.length)
 */
function kernelDensityEstimation(X, kernel, bandwidthMethod) {
    let kernelFn;
    if (kernel === undefined) {
        kernelFn = kernels.gaussian;
    } else if (typeof kernel === "string") {
        if (!kernels[kernel]) {
            throw new Error('未知的内核函数"' + kernel + '"');
        }
        kernelFn = kernels[kernel];
    } else {
        kernelFn = kernel;
    }

    let bandwidth;
    if (typeof bandwidthMethod === "undefined") {
        bandwidth = bandwidthMethods.nrd(X);
    } else if (typeof bandwidthMethod === "string") {
        if (!bandwidthMethods[bandwidthMethod]) {
            throw new Error('未知的带宽选择方法"' + bandwidthMethod + '"');
        }
        bandwidth = bandwidthMethods[bandwidthMethod](X);
    } else {
        bandwidth = bandwidthMethod;
    }

    return function (x) {
        let i = 0;
        let sum = 0;
        for (i = 0; i < X.length; i++) {
            sum += kernelFn((x - X[i]) / bandwidth);
        }
        return sum / bandwidth / X.length;
    };
}

export default kernelDensityEstimation;