//                                               -*- C++ -*-
/**
 *  @file  HypothesisTest.cxx
 *  @brief StatTest implements statistical tests
 *
 *  (C) Copyright 2005-2012 EDF-EADS-Phimeca
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License.
 *
 *  This library is distributed in the hope that it will be useful
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 *
 *  @author: $LastChangedBy: lebrun $
 *  @author: $LastChangedBy: lebrun $
 *  @date:   $LastChangedDate: 2012-04-17 11:31:33 +0200 (Tue, 17 Apr 2012) $
 *  Id:      $Id: HypothesisTest.cxx 2482 2012-04-17 09:31:33Z lebrun $
 */
#include <cmath>
#include <fstream>
#include "HypothesisTest.hxx"
#include "HypothesisTest.hxx"
#include "NumericalPoint.hxx"
#include "Path.hxx"
#include "Log.hxx"
#include "ResourceMap.hxx"
#include "LinearModelFactory.hxx"
#include "Exception.hxx"
#include "Os.hxx"
#include "OTconfig.hxx"

BEGIN_NAMESPACE_OPENTURNS




/* Default constructor */
HypothesisTest::HypothesisTest()
{
  // Nothing to do
}

/* Independance test between 2 scalar scalar samples for discrete distributions  */
TestResult HypothesisTest::ChiSquared(const NumericalSample & firstSample,
                                      const NumericalSample & secondSample,
                                      const NumericalScalar level)
{
  if ((firstSample.getDimension() != 1) || (secondSample.getDimension() != 1)) throw InvalidArgumentException(HERE) << "Error: the ChiSquared test can be performed only between two 1D samples.";
  return RunTwoSamplesRTest(firstSample, secondSample, level, "TwoSampleChiSquared");
}

/* Independence Pearson test between 2 scalar samples which form a gaussian vector: test the linear relation  */
TestResult HypothesisTest::Pearson(const NumericalSample & firstSample,
                                   const NumericalSample & secondSample,
                                   const NumericalScalar level)
{
  if ((firstSample.getDimension() != 1) || (secondSample.getDimension() != 1)) throw InvalidArgumentException(HERE) << "Error: the Pearson test can be performed only between two 1D samples.";
  return RunTwoSamplesRTest(firstSample, secondSample, level, "TwoSamplePearson");
}

/* Smirnov test if two scalar samples (of sizes not necessarily equal) follow the same distribution (only for continuous distributions)*/
TestResult HypothesisTest::Smirnov(const NumericalSample & firstSample,
                                   const NumericalSample & secondSample,
                                   const NumericalScalar level)
{
  if ((firstSample.getDimension() != 1) || (secondSample.getDimension() != 1)) throw InvalidArgumentException(HERE) << "Error: the Smirnov test can be performed only between two 1D samples.";
  return RunTwoSamplesRTest(firstSample, secondSample, level, "TwoSampleSmirnov");
}

/* Spearman test between 2 scalar samples : test the monotonous relation  (only for continuous distributions) */
TestResult HypothesisTest::Spearman(const NumericalSample & firstSample,
                                    const NumericalSample & secondSample,
                                    const NumericalScalar level)
{
  if ((firstSample.getDimension() != 1) || (secondSample.getDimension() != 1)) throw InvalidArgumentException(HERE) << "Error: the Spearman test can be performed only between two 1D samples.";
  return RunTwoSamplesRTest(firstSample, secondSample, level, "TwoSampleSpearman");
}

/* Generic invocation of a R script for executing a test between two 1D samples */
TestResult HypothesisTest::RunTwoSamplesRTest(const NumericalSample & firstSample,
                                              const NumericalSample & secondSample,
                                              const NumericalScalar level,
                                              const String & testName)
{
  const String firstDataFileName(firstSample.storeToTemporaryFile());
  const String secondDataFileName(secondSample.storeToTemporaryFile());
  const String resultFileName(Path::BuildTemporaryFileName("RResult.txt.XXXXXX"));
  const String commandFileName(Path::BuildTemporaryFileName("RCmd.R.XXXXXX"));
  std::ofstream cmdFile(commandFileName.c_str(), std::ios::out);
  // Fill-in the command file
  cmdFile << "library(rotRPackage)" << std::endl;
  cmdFile << "options(digits=17)" << std::endl;
  cmdFile << "options(warn=-1)" << std::endl;
  cmdFile << "firstSample <- data.matrix(read.table(\"" << firstDataFileName << "\"))" << std::endl;
  cmdFile << "secondSample <- data.matrix(read.table(\"" << secondDataFileName << "\"))" << std::endl;
  cmdFile << "res <- computeTest" << testName;
  cmdFile << "(firstSample, secondSample, " << level << ")" << std::endl;
  cmdFile << "f <- file(\"" << resultFileName << "\",\"wt\")" << std::endl;
  cmdFile << "cat(res$test, res$testResult, res$threshold, res$pValue, sep=\"\\n\", file=f)" << std::endl;
  cmdFile << "close(f)" << std::endl;
  cmdFile.close();
  OSS systemCommand;
#ifdef R_EXECUTABLE
  systemCommand << ResourceMap::Get("R-executable-command") << " --no-save --silent < \"" << commandFileName << "\"" << Os::GetDeleteCommandOutput();
#else
  throw NotYetImplementedException(HERE) << "HypothesisTest::RunTwoSamplesRTest() need R";
#endif
  const int returnCode(Os::ExecuteCommand(String(systemCommand).c_str()));
  if (returnCode != 0) throw InternalException(HERE) << "Error: unable to execute the system command " << String(systemCommand) << " returned code is " << returnCode;
  // Parse result file
  std::ifstream resultFile(resultFileName.c_str(), std::ios::in);
  // First, test type
  String testType;
  resultFile >> testType;
  // Second, test results
  Bool testResult;
  resultFile >> testResult;
  // Third, test threshold
  NumericalScalar pThreshold;
  resultFile >> pThreshold;
  // Fourth, test value
  NumericalScalar pValue;
  resultFile >> pValue;

  // Clean-up everything
  if (remove(firstDataFileName.c_str()) == -1) LOGWARN(OSS() << "Warning: cannot remove file " << firstDataFileName);
  if (remove(secondDataFileName.c_str()) == -1) LOGWARN(OSS() << "Warning: cannot remove file " << secondDataFileName);
  if (remove(resultFileName.c_str()) == -1) LOGWARN(OSS() << "Warning: cannot remove file " << resultFileName);
  if (remove(commandFileName.c_str()) == -1) LOGWARN(OSS() << "Warning: cannot remove file " << commandFileName);

  return TestResult(testType, testResult, pValue, pThreshold);
}

/* Independence Pearson test between 2 samples : firstSample of dimension n and secondSample of dimension 1. If firstSample[i] is the numeriacl sample extracted from firstSample (ith coordinate of each point of the numerical sample), PartialPearson performs the Independence Pearson test simultaneously on firstSample[i] and secondSample, for i in the selection. For all i, it is supposed that the couple (firstSample[i] and secondSample) is issued from a gaussian  vector. */
HypothesisTest::TestResultCollection HypothesisTest::PartialPearson(const NumericalSample & firstSample,
                                                                    const NumericalSample & secondSample,
                                                                    const Indices & selection,
                                                                    const NumericalScalar level)
{
  if (!selection.check(firstSample.getDimension() - 1)) throw InvalidArgumentException(HERE) << "Error: invalid selection, repeated indices or values out of bound";
  return RunTwoSamplesASelectionRTest(firstSample, secondSample, selection, level, "PartialPearson");
}

/* Regression test between 2 samples : firstSample of dimension n and secondSample of dimension 1. If firstSample[i] is the numerical sample extracted from firstSample (ith coordinate of each point of the numerical sample), PartialRegression performs the Regression test simultaneously on all firstSample[i] and secondSample, for i in the selection. The Regression test tests ifthe regression model between two scalar numerical samples is significant. It is based on the deviation analysis of the regression. The Fisher distribution is used. */
HypothesisTest::TestResultCollection HypothesisTest::PartialRegression(const NumericalSample & samplePred,
                                                                       const NumericalSample & sampleLab,
                                                                       const Indices & selection,
                                                                       const NumericalScalar level)
{
  if (!selection.check(samplePred.getDimension() - 1)) throw InvalidArgumentException(HERE) << "Error: invalid selection, repeated indices or values out of bound";
  return RunTwoSamplesASelectionRTest(sampleLab, samplePred, selection, level, "PartialRegression");
}

/* Spearman test between 2 samples : firstSample of dimension n and secondSample of dimension 1. If firstSample[i] is the numerical sample extracted from firstSample (ith coordinate of each point of the numerical sample), PartialSpearman performs the Independence Spearman test simultaneously on firstSample[i] and secondSample, for i in the selection. */
HypothesisTest::TestResultCollection HypothesisTest::PartialSpearman(const NumericalSample & firstSample,
                                                                     const NumericalSample & secondSample,
                                                                     const Indices & selection,
                                                                     const NumericalScalar level)
{
  if (!selection.check(firstSample.getDimension() - 1)) throw InvalidArgumentException(HERE) << "Error: invalid selection, repeated indices or values out of bound";
  return RunTwoSamplesASelectionRTest(firstSample, secondSample, selection, level, "PartialSpearman");
}

/* Independence Pearson test between 2 samples : firstSample of dimension n and secondSample of dimension 1. If firstSample[i] is the numerical sample extracted from firstSample (ith coordinate of each point of the numerical sample), FullPearson performs the Independence Pearson test simultaneously on all firstSample[i] and secondSample. For all i, it is supposed that the couple (firstSample[i] and secondSample) is issued from a gaussian  vector. */
HypothesisTest::TestResultCollection HypothesisTest::FullPearson(const NumericalSample & firstSample,
                                                                 const NumericalSample & secondSample,
                                                                 const NumericalScalar level)
{
  const UnsignedLong dimension(firstSample.getDimension());
  Indices selection(dimension);
  for (UnsignedLong i = 0; i < dimension; ++i)
    {
      selection[i] = i;
    }
  return PartialPearson(firstSample, secondSample, selection, level);
}

/* Regression test between 2 samples : firstSample of dimension n and secondSample of dimension 1. If firstSample[i] is the numerical sample extracted from firstSample (ith coordinate of each point of the numerical sample), FullRegression performs the Regression test simultaneously on all firstSample[i] and secondSample. The Regression test tests if the regression model between two scalar numerical samples is significant. It is based on the deviation analysis of the regression. The Fisher distribution is used. */
HypothesisTest::TestResultCollection HypothesisTest::FullRegression(const NumericalSample & samplePred,
                                                                    const NumericalSample & sampleLab,
                                                                    const NumericalScalar level)
{
  const UnsignedLong dimension(samplePred.getDimension());
  Indices selection(dimension);
  for (UnsignedLong i = 0; i < dimension; ++i)
    {
      selection[i] = i;
    }
  return PartialRegression(samplePred, sampleLab, selection, level);
}

/* Spearman test between 2 samples : firstSample of dimension n and secondSample of dimension 1. If firstSample[i] is the numerical sample extracted from firstSample (ith coordinate of each point of the numerical sample), PartialSpearman performs the Independence Spearman test simultaneously on all firstSample[i] and secondSample. */
HypothesisTest::TestResultCollection HypothesisTest::FullSpearman(const NumericalSample & firstSample,
                                                                  const NumericalSample & secondSample,
                                                                  const NumericalScalar level)
{
  const UnsignedLong dimension(firstSample.getDimension());
  Indices selection(dimension);
  for (UnsignedLong i = 0; i < dimension; ++i)
    {
      selection[i] = i;
    }
  return PartialSpearman(firstSample, secondSample, selection, level);
}

/* Generic invocation of a R script for testing the partial correlation between two samples */
HypothesisTest::TestResultCollection HypothesisTest::RunTwoSamplesASelectionRTest(const NumericalSample & firstSample,
                                                                                  const NumericalSample & secondSample,
                                                                                  const Indices & selection,
                                                                                  const NumericalScalar level,
                                                                                  const String & testName)
{
  const String firstDataFileName(firstSample.storeToTemporaryFile());
  const String secondDataFileName(secondSample.storeToTemporaryFile());
  const UnsignedLong size(selection.getSize());
  NumericalSample selectionSample(size, 1);
  for (UnsignedLong i = 0; i < size; ++i) selectionSample[i][0] = selection[i] + 1.0;
  const String selectionFileName(selectionSample.storeToTemporaryFile());
  const String resultFileName(Path::BuildTemporaryFileName("RResult.txt.XXXXXX"));
  const String commandFileName(Path::BuildTemporaryFileName("RCmd.R.XXXXXX"));
  std::ofstream cmdFile(commandFileName.c_str(), std::ios::out);
  // Fill-in the command file
  cmdFile << "library(rotRPackage)" << std::endl;
  cmdFile << "options(digits=17)" << std::endl;
  cmdFile << "options(warn=-1)" << std::endl;
  cmdFile << "firstSample <- data.matrix(read.table(\"" << firstDataFileName << "\"))" << std::endl;
  cmdFile << "secondSample <- data.matrix(read.table(\"" << secondDataFileName << "\"))" << std::endl;
  cmdFile << "selection <- data.matrix(read.table(\"" << selectionFileName << "\"))" << std::endl;
  cmdFile << "res <- computeTest" << testName;
  cmdFile << "(firstSample, secondSample, selection, " << level << ")" << std::endl;
  cmdFile << "f <- file(\"" << resultFileName << "\",\"wt\")" << std::endl;
  cmdFile << "cat(res$test, res$testResult, res$threshold, res$pValue, sep=\"\\n\", file=f)" << std::endl;
  cmdFile << "close(f)" << std::endl;
  cmdFile.close();
  OSS systemCommand;
#ifdef R_EXECUTABLE
  systemCommand << ResourceMap::Get("R-executable-command") << " --no-save --silent < \"" << commandFileName << "\"" << Os::GetDeleteCommandOutput();
#else
  throw NotYetImplementedException(HERE) << "HypothesisTest::RunTwoSamplesASelectionRTest() need R";
#endif
  const int returnCode(Os::ExecuteCommand(systemCommand));
  if (returnCode != 0) throw InternalException(HERE) << "Error: unable to execute the system command " << String(systemCommand) << " returned code is " << returnCode;
  // Parse result file
  std::ifstream resultFile(resultFileName.c_str(), std::ios::in);
  // First, test type
  String testType;
  resultFile >> testType;
  // Second, test results
  Indices results(size + 1);
  for (UnsignedLong i = 0; i <= size; ++i)
    {
      resultFile >> results[i];
    }
  // Third, test threshold
  NumericalScalar pThreshold;
  resultFile >> pThreshold;
  // Fourth, test values
  NumericalPoint pValues(size + 1);
  for (UnsignedLong i = 0; i <= size; ++i)
    {
      resultFile >> pValues[i];
    }

  // Clean-up everything
  if (remove(firstDataFileName.c_str()) == -1) LOGWARN(OSS() << "Warning: cannot remove file " << firstDataFileName);
  if (remove(secondDataFileName.c_str()) == -1) LOGWARN(OSS() << "Warning: cannot remove file " << secondDataFileName);
  if (remove(selectionFileName.c_str()) == -1) LOGWARN(OSS() << "Warning: cannot remove file " << selectionFileName);
  if (remove(resultFileName.c_str()) == -1) LOGWARN(OSS() << "Warning: cannot remove file " << resultFileName);
  if (remove(commandFileName.c_str()) == -1) LOGWARN(OSS() << "Warning: cannot remove file " << commandFileName);

  // Then, build the collection of results
  TestResultCollection resultCollection(size + 1);
  for (UnsignedLong i = 0; i <= size; ++i)
    {
      resultCollection[i] = TestResult(testType, results[i] == 1, pValues[i], pThreshold);
    }
  return resultCollection;
}

END_NAMESPACE_OPENTURNS
