An updated version. Here I compare, for a target correlation Rtarget:
- the actual correlation R of the sample generated by a pseudo-LHS correlated sample. I see that R is different from Rtarget
- the exact correlation dist.R of the underlying normal-copula based joint distribution where the shape matrix of the normal copula is taken equal to Rtarget. I see that dist.R is different from Rtarget, but closely match R
- the correlation obtained using a sample of the previous distribution. Once again there is a close match with dist.R but not with Rtarget
- the shape matrix Rsol I should have used to have dist.R == Rtarget.
- the same as 1 but with Rsol instead of Rtarget. This time R is almost equal to Rtarget
- the same as 2 but with Rsol instead of Rtarget. Of course dist.R == Rtarget, by definition of Rsol
- the same as 3 but with Rsol instead of Rtarget. The correlation is almost equal to Rtarget
So finding Rsol seems to be the solution, unfortunately it is not always possible to get a given correlation using a normal copula (and even any copula) depending on the marginal distributions.
I also added a fully data-based solution to your problem: how to generate a new sample of a distribution given an initial sample, when the only things to keep are the marginal distributions and the correlation matrix (regardless of the actual copula). It is a 5 lines solution!
Here is the code:
import openturns as ot
ot.ResourceMap.SetAsBool("ComposedDistribution-UseGenericCovarianceAlgorithm", True)
def lhsgeneral(pd, correlation, n):
dim = len(pd)
RStar = correlation
unifND = [ot.Uniform(0.0, 1.0)]*dim
normND = [ot.Normal(0.0, 1.0)]*dim
lhsDOE = ot.LHSExperiment(ot.ComposedDistribution(unifND), n)
#lhsDOE = ot.MonteCarloExperiment(ot.ComposedDistribution(unifND), n)
lhs_sample = lhsDOE.generate()
uniform_to_normal = ot.MarginalTransformationEvaluation(unifND, normND)
independent_sample = uniform_to_normal (lhs_sample)
R = independent_sample.computeLinearCorrelation()
P = RStar.computeCholesky()
Q = R.computeCholesky()
M = P * Q.solveLinearSystem(ot.IdentityMatrix(dim))
lin = ot.LinearEvaluation([0.0]*dim, [0.0]*dim, M.transpose())
dependent_sample = lin(independent_sample)
normal_to_arbitrary = ot.MarginalTransformationEvaluation(normND, pd)
transformed_sample = normal_to_arbitrary (dependent_sample)
return transformed_sample
def findR(pd, Rtarget):
# Define the residual between the target correlation and the actual correlation
def residualFunction(X):
n = len(X)
dim = int(0.5 + 0.5 * (1 + 8 * n)**0.5)
R = ot.CorrelationMatrix(dim)
index = 0
for i in range(dim):
for j in range(i):
R[i, j] = X[index]
++index
try:
distribution = ot.ComposedDistribution(pd, ot.NormalCopula(R))
Ractual = distribution.getCorrelation()
residual = ot.Point(n)
index = 0
for i in range(dim):
for j in range(i):
residual[index] = Ractual[i, j] - Rtarget[i, j]
++index
except:
residual = ot.Point(n, 1.0)
return residual
dim = Rtarget.getDimension()
n = (dim * (dim - 1)) // 2
problem = ot.LeastSquaresProblem(ot.PythonFunction(n, n, residualFunction))
bounds = ot.Interval([-1.0]*n, [1.0]*n)
problem.setBounds(bounds)
startingPoint = ot.Point(n)
index = 0
for i in range(dim):
for j in range(i):
startingPoint[index] = Rtarget[i, j]
++index
algo = ot.CMinpack(problem)
algo.setStartingPoint(startingPoint)
algo.run()
result = algo.getResult()
minimizer = result.getOptimalPoint()
Rsolution = ot.CorrelationMatrix(dim)
index = 0
for i in range(dim):
for j in range(i):
Rsolution[i, j] = minimizer[index]
++index
return Rsolution
if __name__ == '__main__':
pd = [ot.Normal(0.0, 20.0), ot.Triangular(0.0, 100.0, 150.0)]
correlation = ot.CorrelationMatrix(2, [1.0, -0.8, -0.8, 1.0])
n = 10000000
##################################
# Solution based on a pseudo-LHS #
##################################
correlatedSamples = lhsgeneral(pd, correlation, n)
R = correlatedSamples.computeLinearCorrelation()
print("(LHS) R=\n", R)
#####################################
# Solution based on a normal copula #
#####################################
# With naive correlation
dist = ot.ComposedDistribution(pd, ot.NormalCopula(correlation))
print("dist.R=\n", dist.getCorrelation())
correlatedSamples = dist.getSample(n)
R = correlatedSamples.computeLinearCorrelation()
print("(MC) R=\n", R)
# With adapted correlation
Rsol = findR(pd, correlation)
print("Rsol=\n", Rsol)
correlatedSamples = lhsgeneral(pd, Rsol, n)
R = correlatedSamples.computeLinearCorrelation()
print("(LHS) R=\n", R)
dist = ot.ComposedDistribution(pd, ot.NormalCopula(Rsol))
print("dist.R=\n", dist.getCorrelation())
correlatedSamples = dist.getSample(n)
R = correlatedSamples.computeLinearCorrelation()
print("(MC) R=\n", R)
###################
# Export the data #
###################
correlatedSamples.exportToCSVFile("data.csv")
##################################################################
# Load the data as if they were generated by an external process #
##################################################################
data = ot.Sample.ImportFromCSVFile("data.csv")
# Identify the associated normal copula
copula = ot.NormalCopulaFactory().build(data)
# Identify the marginal distributions
pd = [ot.HistogramFactory().build(data.getMarginal(i)) for i in range(data.getDimension())]
# Build the joint distribution
dist = ot.ComposedDistribution(pd, copula)
# Generate a new sample
correlatedSamples = dist.getSample(n)
R = correlatedSamples.computeLinearCorrelation()
print("(MC) R=\n", R)
Here is the result:
(LHS) R=
[[ 1 -0.793146 ]
[ -0.793146 1 ]]
dist.R=
[[ 1 -0.793182 ]
[ -0.793182 1 ]]
(MC) R=
[[ 1 -0.793033 ]
[ -0.793033 1 ]]
Rsol=
[[ 1 -0.806877 ]
[ -0.806877 1 ]]
(LHS) R=
[[ 1 -0.799999 ]
[ -0.799999 1 ]]
dist.R=
[[ 1 -0.8 ]
[ -0.8 1 ]]
(MC) R=
[[ 1 -0.800029 ]
[ -0.800029 1 ]]
(MC) R=
[[ 1 -0.800066 ]
[ -0.800066 1 ]]
Cheers
Régis