Hi!
I just found a bug on a specific distribution where the logPDF is wrongly implemented (“*” instead of “+”). The good way would be to check the computed value against a reference value, obtained for example from a symbolic computer system (such as Maple). However, we sometimes do not have the time to do so, hence the bug.
So I implemented a basic consistency checker. In my case, the logarithm of the PDF is obviously inconsistent with the logPDF. This is easy to implement since OT is OO. I did it because I feel that implementing this could save tens of hours of debug for most distributions. Loss of accuracy in extreme cases cannot be found this way, but it may allow to detect gross bugs.
class UnivariateDistributionChecker:
def __init__(self, distribution, sample_size=10, verbose=False, decimal = 7):
self.distribution = distribution
self.verbose = verbose
self.sample_size = sample_size
self.sample = distribution.getSample(self.sample_size)
self.decimal = decimal
if self.verbose:
print("sample=")
print(self.sample)
return
def check_logPDF(self):
"""Check the consistency of logPDF against the log of the PDF."""
if self.verbose:
print("check_logPDF")
logPDF1 = self.distribution.computeLogPDF(self.sample)
logPDF2 = np.log(self.distribution.computePDF(self.sample))
np.testing.assert_almost_equal(logPDF1, logPDF2, decimal = self.decimal)
return
def check_PDF(self):
"""Check the consistency of PDF against the gradient of the CDF."""
if self.verbose:
print("check_PDF")
PDF1 = self.distribution.computePDF(self.sample)
epsilon = ot.ResourceMap.GetAsScalar(
"CenteredFiniteDifferenceGradient-DefaultEpsilon"
)
CDF1 = distribution.computeCDF(self.sample + epsilon)
CDF2 = distribution.computeCDF(self.sample - epsilon)
PDF2 = (CDF1 - CDF2) / (2.0 * epsilon)
np.testing.assert_almost_equal(np.array(PDF1), np.array(PDF2), decimal = self.decimal)
return
def check_DDF(self):
"""Check the consistency of DDF against the gradient of the PDF."""
if self.verbose:
print("check_DDF")
DDF1 = self.distribution.computeDDF(self.sample)
epsilon = ot.ResourceMap.GetAsScalar(
"CenteredFiniteDifferenceGradient-DefaultEpsilon"
)
PDF1 = distribution.computePDF(self.sample + epsilon)
PDF2 = distribution.computePDF(self.sample - epsilon)
DDF2 = (PDF1 - PDF2) / (2.0 * epsilon)
np.testing.assert_almost_equal(np.array(DDF1), np.array(DDF2), decimal = self.decimal)
return
def check_ComplementaryCDF(self):
"""Check the consistency of complementary CDF against the CDF."""
if self.verbose:
print("check_ComplementaryCDF")
CCDF1 = self.distribution.computeComplementaryCDF(self.sample)
CCDF2 = 1.0 - np.array(distribution.computeCDF(self.sample))
np.testing.assert_almost_equal(np.array(CCDF1), np.array(CCDF2), decimal = self.decimal)
return
def check_MinimumVolumeIntervalWithMarginalProbability(self, probability = 0.9):
"""Check the consistency of MinimumVolumeIntervalWithMarginalProbability against the CDF."""
if self.verbose:
print("check_MinimumVolumeIntervalWithMarginalProbability")
interval = self.distribution.computeMinimumVolumeIntervalWithMarginalProbability(probability)[0]
lower = interval.getLowerBound()[0]
upper = interval.getUpperBound()[0]
CDF_up = self.distribution.computeCDF(upper)
CDF_low = self.distribution.computeCDF(lower)
computed_probability = CDF_up - CDF_low
np.testing.assert_almost_equal(probability, computed_probability, decimal = self.decimal)
return
def check_MinimumVolumeLevelSetWithThreshold(self, probability = 0.9):
"""Check the consistency of MinimumVolumeLevelSetWithThreshold against the CDF."""
if self.verbose:
print("check_MinimumVolumeLevelSetWithThreshold")
levelSet, threshold = checker.distribution.computeMinimumVolumeLevelSetWithThreshold(probability)
x = checker.distribution.computeQuantile(1.0 - (1.0 - probability) / 2.0)
computed_PDF = checker.distribution.computePDF(x)
np.testing.assert_almost_equal(threshold, computed_PDF, decimal = self.decimal)
return
def check_all(self):
self.check_PDF()
self.check_logPDF()
self.check_DDF()
self.check_ComplementaryCDF()
self.check_MinimumVolumeIntervalWithMarginalProbability()
self.check_MinimumVolumeLevelSetWithThreshold()
return
From there, it is easy to loop over all continuous univariate distributions:
factory_list = ot.DistributionFactory_GetUniVariateFactories()
n = len(factory_list)
for i in range(n):
distribution = factory_list[i].build()
name = distribution.getName()
if distribution.isContinuous():
print(i, name)
checker = UnivariateDistributionChecker(distribution, decimal = 3)
try:
checker.check_all()
except AssertionError as err:
print(name, ": error")
#print(err)
PS
This is at: The log-PDF of the Pareto is wrong · Issue #1714 · openturns/openturns · GitHub