% NOTE:
% This code is a sample implementation of the method proposed in
% the article entitled: "A Wasserstein Distance-based Double-Bootstrap 
% Method for Comparing Spatial Simulation Output". For the sake of
% simplicity and clarity, this sample code uses linear programming 
% for computing the Wasserstein distance and is designed for comparing two
% samples from bivariate Normal distributions. The use of linear 
% programming slows down the process (the example in this code takes about 
% a minute to run). For faster execution time and higher dimensions, 
% the method for computing the Wasserstein distance can be replaced.

clc
clear

% rand, randi, or randn return the same sequences of numbers each time you 
% restart MATLAB. In other words, Matlab uses the same random seeds in any
% new MATLAB session. Comment out the following line to enable
% repeatability.
% rng("shuffle") % creates a different seed each time

% Parameters 
% Dimensions of the grid
dim1_numCells = 5;
dim2_numCells = 5;
% Parameters for the Simplex Algorithm
maxit = 4000;
tol = 1e-4;
% Parameters for the double bootstrapping method. For real-world
% applications it is recommended to run each bootstrap for at least 100
% iterations
testConfidenceLevel = 95;
numIteration_FirstBootstrap = 15;
numIteration_SecondBootstrap = 20;
%*******************************

% The two samples to be compared drawn from bivariate Normal distributions
mu = [0 0];
sigma = [1 0; 0 1];
sample1 = mvnrnd(mu,sigma,5000);
% Uncomment the following line to visualize the first sample
% plot(sample1(:,1),sample1(:,2),'+')

mu = [0 0];
sigma = [0.8 0; 0 1];
sample2 = mvnrnd(mu,sigma,5000);
% Uncomment the following two lines to visualize the second sample
% figure
% plot(sample2(:,1),sample2(:,2),'+')

% Transpose the two samples
sample1 = sample1';
sample2 = sample2';

% Calculate the Wasserstein distance between the original samples
WassersteinDistance_OriginalSamples = WassersteinDistance_2DGrid(sample1,sample2,dim1_numCells,dim2_numCells,maxit,tol);


%% The Wassserstein Distance-Based Double-Bootstrap Method

% Step 1: Combine both samples into a single sample
n1 = size(sample1,2);
n2 = size(sample2,2);
CombinedSample = [sample1,sample2];
SelectionProb = ones(1,n1+n2);
Indices = 1:n1+n2;

% The double-bootstrapping procedure
VarHatWassersteinDistance = [];
t_likeStatistic = [];
for j = 1:numIteration_SecondBootstrap
    
    WassersteinDistance = [];
    i = 0;
    while i < numIteration_FirstBootstrap    
        
        % increment i
        i = i+1;

        % Step 2: select two samples of n1 and n2 observations with replacement
        SampledIndices1 = randsample(Indices,n1,true,SelectionProb);
        SampledIndices2 = randsample(Indices,n2,true,SelectionProb);
        
        GeneratedSample1 = CombinedSample(:,SampledIndices1);
        GeneratedSample2 = CombinedSample(:,SampledIndices2);

        % Step 3: Calulate the Wasserstein distance between the generated samples 1 and 2
        WassersteinDistance(i) = WassersteinDistance_2DGrid(GeneratedSample1,GeneratedSample2,dim1_numCells,dim2_numCells,maxit,tol);
        
        % if the Wasserstein distance is negative, that means an optimal
        % solution was not found within the number of iterations specified
        % hence these negative numbers should be excluded and the iteration
        % needs to be repeated
        if WassersteinDistance(i) < 0
            i = i - 1;
            WassersteinDistance = WassersteinDistance(1:i);
        end

    end

    % Step 4: Calculate the variance of the Wasserstein distances
    VarHatWassersteinDistance(j) = var(WassersteinDistance);
    
    
    % Step 5: calculate a t-like statistic
    t_likeStatistic(j) = WassersteinDistance(1)/sqrt(VarHatWassersteinDistance(j));
end

% Steps 6 and 7: select an appropriate level of confidence (i.e., 95%) 
% to determine a threshold cutoff value for the t-like statistic.
ThresholdCutoffValue = prctile(t_likeStatistic,testConfidenceLevel);

% Step 8: t-like statistic of the original
t_likeStatOriginalData = WassersteinDistance_OriginalSamples/sqrt(VarHatWassersteinDistance(1));

Ratio = t_likeStatOriginalData / ThresholdCutoffValue;
if Ratio > 1
    SignificantDifference = 1;
else
    SignificantDifference = 0;
end

%% Display the outcome of the test in the Command Window
disp(['1 indicates "Reject" and 0 indicates "Fail to Reject".']);
testOutcome = SignificantDifference